small <- read.csv("/Volumes/Shieh-share$/EGFR/cleaned/Study cohort folder/MCC_MELD_studycohort_20250129.csv")
# smoking final
smoking <- read.csv("/Volumes/Shieh-share$/EGFR/cleaned/Risk factors/4_NLP_Smoking_baseline_20250322.csv")
# egfr final
egfr <- read.csv("/Volumes/Shieh-share$/EGFR/cleaned/Risk factors/Cohort_EGFR_FINAL.csv")
# Prior history of cancer
ph <- read.csv("/Volumes/Shieh-share$/EGFR/cleaned/Risk factors/2_CancerHistories_20250212.csv")
# COPD, pulmonary nodule
copd <- read.csv("/Volumes/Shieh-share$/EGFR/cleaned/Risk factors/3_Comorbidities_baseline_COPD_PN_20250212.csv")
dim(small); length(unique(small$pat_id))
## [1] 9573 66
## [1] 9573
dim(smoking); length(unique(smoking$pat_id))
## [1] 9573 19
## [1] 9573
dim(egfr); length(unique(egfr$pat_id))
## [1] 9646 2
## [1] 9646
# > dim(small); length(unique(small$pat_id))
# [1] 9573 66
# [1] 9573
# > dim(smoking); length(unique(smoking$pat_id))
# [1] 9573 19
# [1] 9573
# > dim(egfr); length(unique(egfr$pat_id))
# [1] 9646 2
# [1] 9646
small <- merge(small, smoking, by = "pat_id")
small <- merge(small, egfr, by = "pat_id", all.x=T); dim(small); length(unique(small$pat_id))
## [1] 9573 85
## [1] 9573
# [1] 9573 85
# [1] 9573
ph <- subset(ph, MP.status == "prior")
dim(ph); length(unique(ph$pat_id))
## [1] 402 11
## [1] 363
# [1] 402 11
# [1] 363
ph <- ph[order(ph$pat_id, ph$date.MP),]
ph1 <- ph[!duplicated(ph[c("pat_id")]),]; dim(ph1)
## [1] 363 11
small <- merge(small, ph1, by = "pat_id", all.x=T); dim(small)
## [1] 9573 95
small$ph <- ifelse(small$MP.status == "prior", 1, 0)
small$ph[is.na(small$ph)] <- 0
dim(copd); length(unique(copd$pat_id))
## [1] 9573 5
## [1] 9573
small <- merge(small, copd, by = "pat_id")
small$event.death <- as.factor(as.character(small$event.death))
small$chemo.ix <- as.factor(as.character(small$chemo.ix))
small$surgery.ix <- as.factor(as.character(small$surgery.ix))
small$radio.ix <- as.factor(as.character(small$radio.ix))
small$ph <- as.factor(as.character(small$ph))
small$COPD <- as.factor(as.character(small$COPD))
small$PN <- as.factor(as.character(small$PN))
small$Gender[ small$Gender == "Other (intersex, DSD)"] <- "Female"
small$Gender[ small$Gender == "Unknown"] <- "Female"
small$EGFRt <- ifelse(small$EGFR_FINAL == "Positive", "1_Positive",
ifelse(small$EGFR_FINAL == "Unknown", "3_Unknown", "2_Negative/VUS"))
getT1Stat <- function(varname, digits=1){
getDescriptionStatsBy(small[, varname],
small$Race,
add_total_col=TRUE,
show_all_values=TRUE,
hrzl_prop=FALSE,
statistics=TRUE,
html=TRUE,
digits=digits,
header_count = TRUE
)
}
table_data <- list()
# Get the basic stats
table_data[["Gender"]] <- getT1Stat("Gender")
table_data[["Age at dx"]] <- getT1Stat("Age_dx")
table_data[["Hospital.Location"]] <- getT1Stat("Hospital.Location")
table_data[["event.death"]] <- getT1Stat("event.death")
table_data[["seer.stage"]] <- getT1Stat("seer.stage")
table_data[["ajcc.stage"]] <- getT1Stat("ajcc.stage")
table_data[["hist.ix"]] <- getT1Stat("hist.ix")
table_data[["chemo.ix"]] <- getT1Stat("chemo.ix")
table_data[["radio.ix"]] <- getT1Stat("radio.ix")
table_data[["surgery.ix"]] <- getT1Stat("surgery.ix")
table_data[["EGFR_Structure"]] <- getT1Stat("EGFR_structure")
table_data[["EGFR_Final"]] <- getT1Stat("EGFR_FINAL")
table_data[["EGFR targetable mutation"]] <- getT1Stat("EGFRt")
table_data[["Smoking status"]] <- getT1Stat("smkstat1")
table_data[["Smoking duration"]] <- getT1Stat("dur1")
table_data[["Smoking pack-years"]] <- getT1Stat("py1")
table_data[["Marital status"]] <- getT1Stat("marry.epic")
table_data[["FH Lung"]] <- getT1Stat("FH_lung")
table_data[["Prior history of cancer"]] <- getT1Stat("ph")
table_data[["COPD"]] <- getT1Stat("COPD")
table_data[["Pulmonary Nodule"]] <- getT1Stat("PN")
rgroup <- c()
n.rgroup <- c()
output_data <- NULL
for (varlabel in names(table_data)) {
output_data <- rbind(
output_data,
table_data[[varlabel]]
)
rgroup <- c(
rgroup,
varlabel
)
n.rgroup <- c(
n.rgroup,
nrow(table_data[[varlabel]])
)
}
htmlTable(output_data,
align = "rrrr",
rgroup = rgroup, n.rgroup = n.rgroup,
rgroupCSSseparator = "",
rowlabel = "",
caption = "Table 1. Baseline characteristics by Race",
ctable = TRUE )
| Table 1. Baseline characteristics by Race | |||||||
|
Total No. 9,573 |
Asian No. 1,891 |
Black No. 1,310 |
Hispanic No. 705 |
Other No. 946 |
White No. 4,721 |
P-value | |
|---|---|---|---|---|---|---|---|
| Gender | |||||||
| Â Â Female | 5,033 (52.6%) | 827 (43.7%) | 740 (56.5%) | 354 (50.2%) | 496 (52.4%) | 2,616 (55.4%) | < 0.0001 |
| Â Â Male | 4,540 (47.4%) | 1,064 (56.3%) | 570 (43.5%) | 351 (49.8%) | 450 (47.6%) | 2,105 (44.6%) | |
| Age at dx | |||||||
|   Mean (SD) | 69.5 (±11.2) | 67.9 (±11.6) | 67.6 (±11.3) | 67.7 (±11.6) | 67.0 (±11.5) | 71.4 (±10.6) | < 0.0001 |
| Hospital.Location | |||||||
| Â Â BMH | 1,964 (20.5%) | 286 (15.1%) | 651 (49.7%) | 176 (25.0%) | 77 (8.1%) | 774 (16.4%) | < 0.0001 |
| Â Â NYPQ | 1,329 (13.9%) | 688 (36.4%) | 103 (7.9%) | 112 (15.9%) | 34 (3.6%) | 392 (8.3%) | |
| Â Â WCM | 6,280 (65.6%) | 917 (48.5%) | 556 (42.4%) | 417 (59.1%) | 835 (88.3%) | 3,555 (75.3%) | |
| event.death | |||||||
| Â Â 0 | 6,485 (67.7%) | 1,444 (76.4%) | 775 (59.2%) | 450 (63.8%) | 813 (85.9%) | 3,003 (63.6%) | < 0.0001 |
| Â Â 1 | 3,088 (32.3%) | 447 (23.6%) | 535 (40.8%) | 255 (36.2%) | 133 (14.1%) | 1,718 (36.4%) | |
| seer.stage | |||||||
| Â Â 0. Insitu | 1,414 (14.8%) | 230 (12.2%) | 132 (10.1%) | 78 (11.1%) | 519 (54.9%) | 455 (9.6%) | < 0.0001 |
| Â Â 1. Localized | 2,643 (27.6%) | 503 (26.6%) | 254 (19.4%) | 154 (21.8%) | 136 (14.4%) | 1,596 (33.8%) | |
| Â Â 2. Regional | 1,873 (19.6%) | 351 (18.6%) | 276 (21.1%) | 162 (23.0%) | 84 (8.9%) | 1,000 (21.2%) | |
| Â Â 3. Distant | 3,015 (31.5%) | 669 (35.4%) | 528 (40.3%) | 266 (37.7%) | 137 (14.5%) | 1,415 (30.0%) | |
| Â Â 4. Unknown | 628 (6.6%) | 138 (7.3%) | 120 (9.2%) | 45 (6.4%) | 70 (7.4%) | 255 (5.4%) | |
| ajcc.stage | |||||||
| Â Â 0 | 1,415 (14.8%) | 231 (12.2%) | 132 (10.1%) | 78 (11.1%) | 519 (54.9%) | 455 (9.6%) | < 0.0001 |
| Â Â 1 | 2,648 (27.7%) | 528 (27.9%) | 232 (17.7%) | 162 (23.0%) | 127 (13.4%) | 1,599 (33.9%) | |
| Â Â 2 | 658 (6.9%) | 115 (6.1%) | 87 (6.6%) | 50 (7.1%) | 28 (3.0%) | 378 (8.0%) | |
| Â Â 3A | 862 (9.0%) | 149 (7.9%) | 140 (10.7%) | 72 (10.2%) | 50 (5.3%) | 451 (9.6%) | |
| Â Â 3B | 394 (4.1%) | 65 (3.4%) | 85 (6.5%) | 35 (5.0%) | 13 (1.4%) | 196 (4.2%) | |
| Â Â 4 | 2,699 (28.2%) | 605 (32.0%) | 475 (36.3%) | 242 (34.3%) | 125 (13.2%) | 1,252 (26.5%) | |
| Â Â Missing | 897 (9.4%) | 198 (10.5%) | 159 (12.1%) | 66 (9.4%) | 84 (8.9%) | 390 (8.3%) | |
| hist.ix | |||||||
| Â Â AD | 5,520 (57.7%) | 1,263 (66.8%) | 655 (50.0%) | 365 (51.8%) | 545 (57.6%) | 2,692 (57.0%) | < 0.0001 |
| Â Â LC | 90 (0.9%) | 12 (0.6%) | 18 (1.4%) | 6 (0.9%) | 11 (1.2%) | 43 (0.9%) | |
| Â Â NSCLC.NOS | 402 (4.2%) | 57 (3.0%) | 82 (6.3%) | 27 (3.8%) | 59 (6.2%) | 177 (3.7%) | |
| Â Â OTH | 1,065 (11.1%) | 160 (8.5%) | 155 (11.8%) | 90 (12.8%) | 81 (8.6%) | 579 (12.3%) | |
| Â Â SC | 689 (7.2%) | 82 (4.3%) | 122 (9.3%) | 71 (10.1%) | 79 (8.4%) | 335 (7.1%) | |
| Â Â SQ | 1,441 (15.1%) | 237 (12.5%) | 237 (18.1%) | 119 (16.9%) | 145 (15.3%) | 703 (14.9%) | |
| Â Â Missing | 366 (3.8%) | 80 (4.2%) | 41 (3.1%) | 27 (3.8%) | 26 (2.7%) | 192 (4.1%) | |
| chemo.ix | |||||||
| Â Â 0 | 4,075 (42.6%) | 751 (39.7%) | 505 (38.5%) | 306 (43.4%) | 179 (18.9%) | 2,334 (49.4%) | < 0.0001 |
| Â Â 1 | 2,801 (29.3%) | 621 (32.8%) | 459 (35.0%) | 232 (32.9%) | 91 (9.6%) | 1,398 (29.6%) | |
| Â Â Missing | 2,697 (28.2%) | 519 (27.4%) | 346 (26.4%) | 167 (23.7%) | 676 (71.5%) | 989 (20.9%) | |
| radio.ix | |||||||
| Â Â 0 | 4,439 (46.4%) | 866 (45.8%) | 553 (42.2%) | 309 (43.8%) | 201 (21.2%) | 2,510 (53.2%) | < 0.0001 |
| Â Â 1 | 961 (10.0%) | 128 (6.8%) | 196 (15.0%) | 78 (11.1%) | 21 (2.2%) | 538 (11.4%) | |
| Â Â Missing | 4,173 (43.6%) | 897 (47.4%) | 561 (42.8%) | 318 (45.1%) | 724 (76.5%) | 1,673 (35.4%) | |
| surgery.ix | |||||||
| Â Â 0 | 5,829 (60.9%) | 1,119 (59.2%) | 967 (73.8%) | 472 (67.0%) | 665 (70.3%) | 2,606 (55.2%) | < 0.0001 |
| Â Â 1 | 3,677 (38.4%) | 751 (39.7%) | 334 (25.5%) | 230 (32.6%) | 277 (29.3%) | 2,085 (44.2%) | |
| Â Â Missing | 67 (0.7%) | 21 (1.1%) | 9 (0.7%) | 3 (0.4%) | 4 (0.4%) | 30 (0.6%) | |
| EGFR_Structure | |||||||
| Â Â Negative | 1,757 (18.4%) | 230 (12.2%) | 242 (18.5%) | 155 (22.0%) | 174 (18.4%) | 956 (20.2%) | < 0.0001 |
| Â Â Positive | 695 (7.3%) | 287 (15.2%) | 52 (4.0%) | 45 (6.4%) | 78 (8.2%) | 233 (4.9%) | |
| Â Â Unknown | 7,058 (73.7%) | 1,355 (71.7%) | 1,006 (76.8%) | 500 (70.9%) | 686 (72.5%) | 3,511 (74.4%) | |
| Â Â VUS | 63 (0.7%) | 19 (1.0%) | 10 (0.8%) | 5 (0.7%) | 8 (0.8%) | 21 (0.4%) | |
| EGFR_Final | |||||||
| Â Â Negative | 2,384 (24.9%) | 350 (18.5%) | 285 (21.8%) | 195 (27.7%) | 228 (24.1%) | 1,326 (28.1%) | < 0.0001 |
| Â Â Positive | 1,092 (11.4%) | 455 (24.1%) | 65 (5.0%) | 61 (8.7%) | 101 (10.7%) | 410 (8.7%) | |
| Â Â Unknown | 6,085 (63.6%) | 1,083 (57.3%) | 957 (73.1%) | 449 (63.7%) | 615 (65.0%) | 2,981 (63.1%) | |
| Â Â VUS | 12 (0.1%) | 3 (0.2%) | 3 (0.2%) | 0 (0.0%) | 2 (0.2%) | 4 (0.1%) | |
| EGFR targetable mutation | |||||||
| Â Â 1_Positive | 1,092 (11.4%) | 455 (24.1%) | 65 (5.0%) | 61 (8.7%) | 101 (10.7%) | 410 (8.7%) | < 0.0001 |
| Â Â 2_Negative/VUS | 2,396 (25.0%) | 353 (18.7%) | 288 (22.0%) | 195 (27.7%) | 230 (24.3%) | 1,330 (28.2%) | |
| Â Â 3_Unknown | 6,085 (63.6%) | 1,083 (57.3%) | 957 (73.1%) | 449 (63.7%) | 615 (65.0%) | 2,981 (63.1%) | |
| Smoking status | |||||||
| Â Â Current | 1,214 (12.7%) | 327 (17.3%) | 164 (12.5%) | 109 (15.5%) | 132 (14.0%) | 482 (10.2%) | < 0.0001 |
| Â Â Former | 4,787 (50.0%) | 627 (33.2%) | 500 (38.2%) | 298 (42.3%) | 540 (57.1%) | 2,822 (59.8%) | |
| Â Â Never | 1,565 (16.3%) | 600 (31.7%) | 104 (7.9%) | 102 (14.5%) | 179 (18.9%) | 580 (12.3%) | |
| Â Â Unknown | 2,007 (21.0%) | 337 (17.8%) | 542 (41.4%) | 196 (27.8%) | 95 (10.0%) | 837 (17.7%) | |
| Smoking duration | |||||||
|   Mean (SD) | 24.6 (±19.5) | 16.2 (±19.9) | 28.2 (±18.8) | 26.3 (±20.2) | 24.2 (±18.8) | 26.8 (±18.7) | < 0.0001 |
| Â Â Missing | 3,002 (31.4%) | 678 (35.9%) | 659 (50.3%) | 264 (37.4%) | 182 (19.2%) | 1,219 (25.8%) | |
| Smoking pack-years | |||||||
|   Mean (SD) | 27.6 (±29.0) | 16.3 (±26.3) | 25.4 (±25.2) | 26.1 (±26.0) | 27.6 (±29.2) | 32.0 (±29.8) | < 0.0001 |
| Â Â Missing | 3,151 (32.9%) | 698 (36.9%) | 674 (51.5%) | 278 (39.4%) | 201 (21.2%) | 1,300 (27.5%) | |
| Marital status | |||||||
| Â Â Married | 4,636 (48.4%) | 1,287 (68.1%) | 344 (26.3%) | 261 (37.0%) | 436 (46.1%) | 2,308 (48.9%) | < 0.0001 |
| Â Â Other | 4,937 (51.6%) | 604 (31.9%) | 966 (73.7%) | 444 (63.0%) | 510 (53.9%) | 2,413 (51.1%) | |
| FH Lung | |||||||
| Â Â 0 | 871 (9.1%) | 88 (4.7%) | 103 (7.9%) | 79 (11.2%) | 71 (7.5%) | 530 (11.2%) | < 0.0001 |
| Â Â 1 | 1,030 (10.8%) | 137 (7.2%) | 70 (5.3%) | 55 (7.8%) | 117 (12.4%) | 651 (13.8%) | |
| Â Â NoEHR | 7,672 (80.1%) | 1,666 (88.1%) | 1,137 (86.8%) | 571 (81.0%) | 758 (80.1%) | 3,540 (75.0%) | |
| Prior history of cancer | |||||||
| Â Â 0 | 9,210 (96.2%) | 1,848 (97.7%) | 1,259 (96.1%) | 671 (95.2%) | 940 (99.4%) | 4,492 (95.1%) | < 0.0001 |
| Â Â 1 | 363 (3.8%) | 43 (2.3%) | 51 (3.9%) | 34 (4.8%) | 6 (0.6%) | 229 (4.9%) | |
| COPD | |||||||
| Â Â 0 | 8,155 (85.2%) | 1,793 (94.8%) | 1,044 (79.7%) | 571 (81.0%) | 855 (90.4%) | 3,892 (82.4%) | < 0.0001 |
| Â Â 1 | 1,418 (14.8%) | 98 (5.2%) | 266 (20.3%) | 134 (19.0%) | 91 (9.6%) | 829 (17.6%) | |
| Pulmonary Nodule | |||||||
| Â Â 0 | 7,064 (73.8%) | 1,432 (75.7%) | 1,015 (77.5%) | 506 (71.8%) | 806 (85.2%) | 3,305 (70.0%) | < 0.0001 |
| Â Â 1 | 2,509 (26.2%) | 459 (24.3%) | 295 (22.5%) | 199 (28.2%) | 140 (14.8%) | 1,416 (30.0%) | |
table(small$yr.dx, small$EGFRt, useNA="always")
##
## 1_Positive 2_Negative/VUS 3_Unknown <NA>
## 1988 0 0 1 0
## 1992 0 0 1 0
## 1993 0 0 1 0
## 1994 0 0 3 0
## 1996 0 1 3 0
## 1998 0 3 6 0
## 1999 1 2 5 0
## 2000 1 0 6 0
## 2001 0 4 9 0
## 2002 0 2 15 0
## 2003 1 2 19 0
## 2004 1 5 44 0
## 2005 3 3 50 0
## 2006 4 3 65 0
## 2007 3 7 63 0
## 2008 4 16 87 0
## 2009 10 18 112 0
## 2010 15 49 574 0
## 2011 30 62 558 0
## 2012 29 102 519 0
## 2013 73 174 419 0
## 2014 80 200 464 0
## 2015 98 178 528 0
## 2016 93 195 595 0
## 2017 75 169 567 0
## 2018 94 180 432 0
## 2019 110 200 437 0
## 2020 97 187 337 0
## 2021 70 165 52 0
## 2022 82 207 55 0
## 2023 81 171 40 0
## 2024 37 91 18 0
## <NA> 0 0 0 0
library(gmodels)
CrossTable(small$yr.dx,small$EGFRt, prop.t=F, prop.c=F, prop.chisq = F)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## |-------------------------|
##
##
## Total Observations in Table: 9573
##
##
## | small$EGFRt
## small$yr.dx | 1_Positive | 2_Negative/VUS | 3_Unknown | Row Total |
## -------------|----------------|----------------|----------------|----------------|
## 1988 | 0 | 0 | 1 | 1 |
## | 0.000 | 0.000 | 1.000 | 0.000 |
## -------------|----------------|----------------|----------------|----------------|
## 1992 | 0 | 0 | 1 | 1 |
## | 0.000 | 0.000 | 1.000 | 0.000 |
## -------------|----------------|----------------|----------------|----------------|
## 1993 | 0 | 0 | 1 | 1 |
## | 0.000 | 0.000 | 1.000 | 0.000 |
## -------------|----------------|----------------|----------------|----------------|
## 1994 | 0 | 0 | 3 | 3 |
## | 0.000 | 0.000 | 1.000 | 0.000 |
## -------------|----------------|----------------|----------------|----------------|
## 1996 | 0 | 1 | 3 | 4 |
## | 0.000 | 0.250 | 0.750 | 0.000 |
## -------------|----------------|----------------|----------------|----------------|
## 1998 | 0 | 3 | 6 | 9 |
## | 0.000 | 0.333 | 0.667 | 0.001 |
## -------------|----------------|----------------|----------------|----------------|
## 1999 | 1 | 2 | 5 | 8 |
## | 0.125 | 0.250 | 0.625 | 0.001 |
## -------------|----------------|----------------|----------------|----------------|
## 2000 | 1 | 0 | 6 | 7 |
## | 0.143 | 0.000 | 0.857 | 0.001 |
## -------------|----------------|----------------|----------------|----------------|
## 2001 | 0 | 4 | 9 | 13 |
## | 0.000 | 0.308 | 0.692 | 0.001 |
## -------------|----------------|----------------|----------------|----------------|
## 2002 | 0 | 2 | 15 | 17 |
## | 0.000 | 0.118 | 0.882 | 0.002 |
## -------------|----------------|----------------|----------------|----------------|
## 2003 | 1 | 2 | 19 | 22 |
## | 0.045 | 0.091 | 0.864 | 0.002 |
## -------------|----------------|----------------|----------------|----------------|
## 2004 | 1 | 5 | 44 | 50 |
## | 0.020 | 0.100 | 0.880 | 0.005 |
## -------------|----------------|----------------|----------------|----------------|
## 2005 | 3 | 3 | 50 | 56 |
## | 0.054 | 0.054 | 0.893 | 0.006 |
## -------------|----------------|----------------|----------------|----------------|
## 2006 | 4 | 3 | 65 | 72 |
## | 0.056 | 0.042 | 0.903 | 0.008 |
## -------------|----------------|----------------|----------------|----------------|
## 2007 | 3 | 7 | 63 | 73 |
## | 0.041 | 0.096 | 0.863 | 0.008 |
## -------------|----------------|----------------|----------------|----------------|
## 2008 | 4 | 16 | 87 | 107 |
## | 0.037 | 0.150 | 0.813 | 0.011 |
## -------------|----------------|----------------|----------------|----------------|
## 2009 | 10 | 18 | 112 | 140 |
## | 0.071 | 0.129 | 0.800 | 0.015 |
## -------------|----------------|----------------|----------------|----------------|
## 2010 | 15 | 49 | 574 | 638 |
## | 0.024 | 0.077 | 0.900 | 0.067 |
## -------------|----------------|----------------|----------------|----------------|
## 2011 | 30 | 62 | 558 | 650 |
## | 0.046 | 0.095 | 0.858 | 0.068 |
## -------------|----------------|----------------|----------------|----------------|
## 2012 | 29 | 102 | 519 | 650 |
## | 0.045 | 0.157 | 0.798 | 0.068 |
## -------------|----------------|----------------|----------------|----------------|
## 2013 | 73 | 174 | 419 | 666 |
## | 0.110 | 0.261 | 0.629 | 0.070 |
## -------------|----------------|----------------|----------------|----------------|
## 2014 | 80 | 200 | 464 | 744 |
## | 0.108 | 0.269 | 0.624 | 0.078 |
## -------------|----------------|----------------|----------------|----------------|
## 2015 | 98 | 178 | 528 | 804 |
## | 0.122 | 0.221 | 0.657 | 0.084 |
## -------------|----------------|----------------|----------------|----------------|
## 2016 | 93 | 195 | 595 | 883 |
## | 0.105 | 0.221 | 0.674 | 0.092 |
## -------------|----------------|----------------|----------------|----------------|
## 2017 | 75 | 169 | 567 | 811 |
## | 0.092 | 0.208 | 0.699 | 0.085 |
## -------------|----------------|----------------|----------------|----------------|
## 2018 | 94 | 180 | 432 | 706 |
## | 0.133 | 0.255 | 0.612 | 0.074 |
## -------------|----------------|----------------|----------------|----------------|
## 2019 | 110 | 200 | 437 | 747 |
## | 0.147 | 0.268 | 0.585 | 0.078 |
## -------------|----------------|----------------|----------------|----------------|
## 2020 | 97 | 187 | 337 | 621 |
## | 0.156 | 0.301 | 0.543 | 0.065 |
## -------------|----------------|----------------|----------------|----------------|
## 2021 | 70 | 165 | 52 | 287 |
## | 0.244 | 0.575 | 0.181 | 0.030 |
## -------------|----------------|----------------|----------------|----------------|
## 2022 | 82 | 207 | 55 | 344 |
## | 0.238 | 0.602 | 0.160 | 0.036 |
## -------------|----------------|----------------|----------------|----------------|
## 2023 | 81 | 171 | 40 | 292 |
## | 0.277 | 0.586 | 0.137 | 0.031 |
## -------------|----------------|----------------|----------------|----------------|
## 2024 | 37 | 91 | 18 | 146 |
## | 0.253 | 0.623 | 0.123 | 0.015 |
## -------------|----------------|----------------|----------------|----------------|
## Column Total | 1092 | 2396 | 6085 | 9573 |
## -------------|----------------|----------------|----------------|----------------|
##
##
small$EGFR_testing <- ifelse(small$EGFRt == "3_Unknown", 0, 1)
CrossTable(small$yr.dx,small$EGFR_testing, prop.t=F, prop.c=F, prop.chisq = F)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## |-------------------------|
##
##
## Total Observations in Table: 9573
##
##
## | small$EGFR_testing
## small$yr.dx | 0 | 1 | Row Total |
## -------------|-----------|-----------|-----------|
## 1988 | 1 | 0 | 1 |
## | 1.000 | 0.000 | 0.000 |
## -------------|-----------|-----------|-----------|
## 1992 | 1 | 0 | 1 |
## | 1.000 | 0.000 | 0.000 |
## -------------|-----------|-----------|-----------|
## 1993 | 1 | 0 | 1 |
## | 1.000 | 0.000 | 0.000 |
## -------------|-----------|-----------|-----------|
## 1994 | 3 | 0 | 3 |
## | 1.000 | 0.000 | 0.000 |
## -------------|-----------|-----------|-----------|
## 1996 | 3 | 1 | 4 |
## | 0.750 | 0.250 | 0.000 |
## -------------|-----------|-----------|-----------|
## 1998 | 6 | 3 | 9 |
## | 0.667 | 0.333 | 0.001 |
## -------------|-----------|-----------|-----------|
## 1999 | 5 | 3 | 8 |
## | 0.625 | 0.375 | 0.001 |
## -------------|-----------|-----------|-----------|
## 2000 | 6 | 1 | 7 |
## | 0.857 | 0.143 | 0.001 |
## -------------|-----------|-----------|-----------|
## 2001 | 9 | 4 | 13 |
## | 0.692 | 0.308 | 0.001 |
## -------------|-----------|-----------|-----------|
## 2002 | 15 | 2 | 17 |
## | 0.882 | 0.118 | 0.002 |
## -------------|-----------|-----------|-----------|
## 2003 | 19 | 3 | 22 |
## | 0.864 | 0.136 | 0.002 |
## -------------|-----------|-----------|-----------|
## 2004 | 44 | 6 | 50 |
## | 0.880 | 0.120 | 0.005 |
## -------------|-----------|-----------|-----------|
## 2005 | 50 | 6 | 56 |
## | 0.893 | 0.107 | 0.006 |
## -------------|-----------|-----------|-----------|
## 2006 | 65 | 7 | 72 |
## | 0.903 | 0.097 | 0.008 |
## -------------|-----------|-----------|-----------|
## 2007 | 63 | 10 | 73 |
## | 0.863 | 0.137 | 0.008 |
## -------------|-----------|-----------|-----------|
## 2008 | 87 | 20 | 107 |
## | 0.813 | 0.187 | 0.011 |
## -------------|-----------|-----------|-----------|
## 2009 | 112 | 28 | 140 |
## | 0.800 | 0.200 | 0.015 |
## -------------|-----------|-----------|-----------|
## 2010 | 574 | 64 | 638 |
## | 0.900 | 0.100 | 0.067 |
## -------------|-----------|-----------|-----------|
## 2011 | 558 | 92 | 650 |
## | 0.858 | 0.142 | 0.068 |
## -------------|-----------|-----------|-----------|
## 2012 | 519 | 131 | 650 |
## | 0.798 | 0.202 | 0.068 |
## -------------|-----------|-----------|-----------|
## 2013 | 419 | 247 | 666 |
## | 0.629 | 0.371 | 0.070 |
## -------------|-----------|-----------|-----------|
## 2014 | 464 | 280 | 744 |
## | 0.624 | 0.376 | 0.078 |
## -------------|-----------|-----------|-----------|
## 2015 | 528 | 276 | 804 |
## | 0.657 | 0.343 | 0.084 |
## -------------|-----------|-----------|-----------|
## 2016 | 595 | 288 | 883 |
## | 0.674 | 0.326 | 0.092 |
## -------------|-----------|-----------|-----------|
## 2017 | 567 | 244 | 811 |
## | 0.699 | 0.301 | 0.085 |
## -------------|-----------|-----------|-----------|
## 2018 | 432 | 274 | 706 |
## | 0.612 | 0.388 | 0.074 |
## -------------|-----------|-----------|-----------|
## 2019 | 437 | 310 | 747 |
## | 0.585 | 0.415 | 0.078 |
## -------------|-----------|-----------|-----------|
## 2020 | 337 | 284 | 621 |
## | 0.543 | 0.457 | 0.065 |
## -------------|-----------|-----------|-----------|
## 2021 | 52 | 235 | 287 |
## | 0.181 | 0.819 | 0.030 |
## -------------|-----------|-----------|-----------|
## 2022 | 55 | 289 | 344 |
## | 0.160 | 0.840 | 0.036 |
## -------------|-----------|-----------|-----------|
## 2023 | 40 | 252 | 292 |
## | 0.137 | 0.863 | 0.031 |
## -------------|-----------|-----------|-----------|
## 2024 | 18 | 128 | 146 |
## | 0.123 | 0.877 | 0.015 |
## -------------|-----------|-----------|-----------|
## Column Total | 6085 | 3488 | 9573 |
## -------------|-----------|-----------|-----------|
##
##
summary_df <- small %>%
group_by(yr.dx) %>%
summarise(
proportion = mean(EGFR_testing),
count = n()
)
summary_df <- data.frame(summary_df)
summary_df1 <- subset(summary_df, yr.dx >= 2003)
# Create line plot
ggplot(summary_df1, aes(x = yr.dx, y = proportion)) +
geom_line(color = "steelblue", size = 1) +
geom_point(color = "steelblue", size = 2) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits=c(0,1)) +
labs(
# title = "Known EGFR status (out of total cases) by year of lung cancer diagnosis",
x = "Year of lung cancer diagnosis",
y = "Proportion of Known EGFR status (out of total lung cancer)"
) +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
small$ysq1[ is.na(small$ysq1) ] <- 9999
small$smkstat2 <- ifelse(small$smkstat1 == "Never", "1_Never",
ifelse(small$smkstat1 == "Former" & small$ysq1 > 15 & small$ysq1 != 9999, "2_LongQuit",
ifelse(small$smkstat1 == "Former", "3_RecentQuit",
ifelse(small$smkstat1 == "Current", "4_Current", "5_Unknown"))))
table(small$smkstat2, useNA="always")
##
## 1_Never 2_LongQuit 3_RecentQuit 4_Current 5_Unknown <NA>
## 1565 1566 3221 1214 2007 0
# 1_Never 2_LongQuit 3_RecentQuit 4_Current 5_Unknown <NA>
# 1565 1566 3221 1214 2007 0
table(small$smkstat2, small$EGFRt)
##
## 1_Positive 2_Negative/VUS 3_Unknown
## 1_Never 521 371 673
## 2_LongQuit 210 584 772
## 3_RecentQuit 246 1110 1865
## 4_Current 84 290 840
## 5_Unknown 31 41 1935
# 1_Positive 2_Negative/VUS 3_Unknown
# 1_Never 521 371 673
# 2_LongQuit 210 584 772
# 3_RecentQuit 246 1110 1865
# 4_Current 84 290 840
# 5_Unknown 31 41 1935
small$EGFRt1 <- ifelse(small$EGFRt == "1_Positive", 1, 0)
small$EGFRt1 <- as.factor(as.character(small$EGFRt1))
getT1Stat <- function(varname, digits=1){
getDescriptionStatsBy(small[, varname],
small$Gender,
add_total_col=TRUE,
show_all_values=TRUE,
hrzl_prop=FALSE,
statistics=TRUE,
html=TRUE,
digits=digits,
header_count = TRUE
)
}
table_data <- list()
# Get the basic stats
table_data[["EGFR Status"]] <- getT1Stat("EGFRt1")
rgroup <- c()
n.rgroup <- c()
output_data <- NULL
for (varlabel in names(table_data)) {
output_data <- rbind(
output_data,
table_data[[varlabel]]
)
rgroup <- c(
rgroup,
varlabel
)
n.rgroup <- c(
n.rgroup,
nrow(table_data[[varlabel]])
)
}
htmlTable(output_data,
align = "rrrr",
rgroup = rgroup, n.rgroup = n.rgroup,
rgroupCSSseparator = "",
rowlabel = "",
caption = "Figure 3 Sources by Sex",
ctable = TRUE )
| Figure 3 Sources by Sex | ||||
|
Total No. 9,573 |
Female No. 5,033 |
Male No. 4,540 |
P-value | |
|---|---|---|---|---|
| EGFR Status | ||||
| Â Â 0 | 8,481 (88.6%) | 4,309 (85.6%) | 4,172 (91.9%) | < 0.0001 |
| Â Â 1 | 1,092 (11.4%) | 724 (14.4%) | 368 (8.1%) | |
getT1Stat <- function(varname, digits=1){
getDescriptionStatsBy(small[, varname],
small$smkstat2,
add_total_col=TRUE,
show_all_values=TRUE,
hrzl_prop=FALSE,
statistics=TRUE,
html=TRUE,
digits=digits,
header_count = TRUE
)
}
table_data <- list()
# Get the basic stats
table_data[["EGFR Status"]] <- getT1Stat("EGFRt1")
rgroup <- c()
n.rgroup <- c()
output_data <- NULL
for (varlabel in names(table_data)) {
output_data <- rbind(
output_data,
table_data[[varlabel]]
)
rgroup <- c(
rgroup,
varlabel
)
n.rgroup <- c(
n.rgroup,
nrow(table_data[[varlabel]])
)
}
htmlTable(output_data,
align = "rrrr",
rgroup = rgroup, n.rgroup = n.rgroup,
rgroupCSSseparator = "",
rowlabel = "",
caption = "Figure 3 Sources by SMK",
ctable = TRUE )
| Figure 3 Sources by SMK | |||||||
|
Total No. 9,573 |
1_Never No. 1,565 |
2_LongQuit No. 1,566 |
3_RecentQuit No. 3,221 |
4_Current No. 1,214 |
5_Unknown No. 2,007 |
P-value | |
|---|---|---|---|---|---|---|---|
| EGFR Status | |||||||
| Â Â 0 | 8,481 (88.6%) | 1,044 (66.7%) | 1,356 (86.6%) | 2,975 (92.4%) | 1,130 (93.1%) | 1,976 (98.5%) | < 0.0001 |
| Â Â 1 | 1,092 (11.4%) | 521 (33.3%) | 210 (13.4%) | 246 (7.6%) | 84 (6.9%) | 31 (1.5%) | |
getT1Stat <- function(varname, digits=1){
getDescriptionStatsBy(small[, varname],
small$Race,
add_total_col=TRUE,
show_all_values=TRUE,
hrzl_prop=FALSE,
statistics=TRUE,
html=TRUE,
digits=digits,
header_count = TRUE
)
}
table_data <- list()
# Get the basic stats
table_data[["EGFR Status"]] <- getT1Stat("EGFRt1")
rgroup <- c()
n.rgroup <- c()
output_data <- NULL
for (varlabel in names(table_data)) {
output_data <- rbind(
output_data,
table_data[[varlabel]]
)
rgroup <- c(
rgroup,
varlabel
)
n.rgroup <- c(
n.rgroup,
nrow(table_data[[varlabel]])
)
}
htmlTable(output_data,
align = "rrrr",
rgroup = rgroup, n.rgroup = n.rgroup,
rgroupCSSseparator = "",
rowlabel = "",
caption = "Figure 3 Sources by Race",
ctable = TRUE )
| Figure 3 Sources by Race | |||||||
|
Total No. 9,573 |
Asian No. 1,891 |
Black No. 1,310 |
Hispanic No. 705 |
Other No. 946 |
White No. 4,721 |
P-value | |
|---|---|---|---|---|---|---|---|
| EGFR Status | |||||||
| Â Â 0 | 8,481 (88.6%) | 1,436 (75.9%) | 1,245 (95.0%) | 644 (91.3%) | 845 (89.3%) | 4,311 (91.3%) | < 0.0001 |
| Â Â 1 | 1,092 (11.4%) | 455 (24.1%) | 65 (5.0%) | 61 (8.7%) | 101 (10.7%) | 410 (8.7%) | |
getT1Stat <- function(varname, digits=1){
getDescriptionStatsBy(small[, varname],
small$Hospital.Location,
add_total_col=TRUE,
show_all_values=TRUE,
hrzl_prop=FALSE,
statistics=TRUE,
html=TRUE,
digits=digits,
header_count = TRUE
)
}
table_data <- list()
# Get the basic stats
table_data[["EGFR Status"]] <- getT1Stat("EGFRt1")
rgroup <- c()
n.rgroup <- c()
output_data <- NULL
for (varlabel in names(table_data)) {
output_data <- rbind(
output_data,
table_data[[varlabel]]
)
rgroup <- c(
rgroup,
varlabel
)
n.rgroup <- c(
n.rgroup,
nrow(table_data[[varlabel]])
)
}
htmlTable(output_data,
align = "rrrr",
rgroup = rgroup, n.rgroup = n.rgroup,
rgroupCSSseparator = "",
rowlabel = "",
caption = "Figure 3 Sources by Hospital",
ctable = TRUE )
| Figure 3 Sources by Hospital | |||||
|
Total No. 9,573 |
BMH No. 1,964 |
NYPQ No. 1,329 |
WCM No. 6,280 |
P-value | |
|---|---|---|---|---|---|
| EGFR Status | |||||
| Â Â 0 | 8,481 (88.6%) | 1,905 (97.0%) | 1,169 (88.0%) | 5,407 (86.1%) | < 0.0001 |
| Â Â 1 | 1,092 (11.4%) | 59 (3.0%) | 160 (12.0%) | 873 (13.9%) | |
small1 <- subset(small, EGFR_FINAL != "Unknown")
getT1Stat <- function(varname, digits=1){
getDescriptionStatsBy(small1[, varname],
small1$Gender,
add_total_col=TRUE,
show_all_values=TRUE,
hrzl_prop=FALSE,
statistics=TRUE,
html=TRUE,
digits=digits,
header_count = TRUE
)
}
table_data <- list()
# Get the basic stats
table_data[["EGFR Status"]] <- getT1Stat("EGFRt1")
rgroup <- c()
n.rgroup <- c()
output_data <- NULL
for (varlabel in names(table_data)) {
output_data <- rbind(
output_data,
table_data[[varlabel]]
)
rgroup <- c(
rgroup,
varlabel
)
n.rgroup <- c(
n.rgroup,
nrow(table_data[[varlabel]])
)
}
htmlTable(output_data,
align = "rrrr",
rgroup = rgroup, n.rgroup = n.rgroup,
rgroupCSSseparator = "",
rowlabel = "",
caption = "Figure 3 Sources by Sex",
ctable = TRUE )
| Figure 3 Sources by Sex | ||||
|
Total No. 3,488 |
Female No. 1,940 |
Male No. 1,548 |
P-value | |
|---|---|---|---|---|
| EGFR Status | ||||
| Â Â 0 | 2,396 (68.7%) | 1,216 (62.7%) | 1,180 (76.2%) | < 0.0001 |
| Â Â 1 | 1,092 (31.3%) | 724 (37.3%) | 368 (23.8%) | |
getT1Stat <- function(varname, digits=1){
getDescriptionStatsBy(small1[, varname],
small1$smkstat2,
add_total_col=TRUE,
show_all_values=TRUE,
hrzl_prop=FALSE,
statistics=TRUE,
html=TRUE,
digits=digits,
header_count = TRUE
)
}
table_data <- list()
# Get the basic stats
table_data[["EGFR Status"]] <- getT1Stat("EGFRt1")
rgroup <- c()
n.rgroup <- c()
output_data <- NULL
for (varlabel in names(table_data)) {
output_data <- rbind(
output_data,
table_data[[varlabel]]
)
rgroup <- c(
rgroup,
varlabel
)
n.rgroup <- c(
n.rgroup,
nrow(table_data[[varlabel]])
)
}
htmlTable(output_data,
align = "rrrr",
rgroup = rgroup, n.rgroup = n.rgroup,
rgroupCSSseparator = "",
rowlabel = "",
caption = "Figure 3 Sources by SMK",
ctable = TRUE )
| Figure 3 Sources by SMK | |||||||
|
Total No. 3,488 |
1_Never No. 892 |
2_LongQuit No. 794 |
3_RecentQuit No. 1,356 |
4_Current No. 374 |
5_Unknown No. 72 |
P-value | |
|---|---|---|---|---|---|---|---|
| EGFR Status | |||||||
| Â Â 0 | 2,396 (68.7%) | 371 (41.6%) | 584 (73.6%) | 1,110 (81.9%) | 290 (77.5%) | 41 (56.9%) | < 0.0001 |
| Â Â 1 | 1,092 (31.3%) | 521 (58.4%) | 210 (26.4%) | 246 (18.1%) | 84 (22.5%) | 31 (43.1%) | |
getT1Stat <- function(varname, digits=1){
getDescriptionStatsBy(small1[, varname],
small1$Race,
add_total_col=TRUE,
show_all_values=TRUE,
hrzl_prop=FALSE,
statistics=TRUE,
html=TRUE,
digits=digits,
header_count = TRUE
)
}
table_data <- list()
# Get the basic stats
table_data[["EGFR Status"]] <- getT1Stat("EGFRt1")
rgroup <- c()
n.rgroup <- c()
output_data <- NULL
for (varlabel in names(table_data)) {
output_data <- rbind(
output_data,
table_data[[varlabel]]
)
rgroup <- c(
rgroup,
varlabel
)
n.rgroup <- c(
n.rgroup,
nrow(table_data[[varlabel]])
)
}
htmlTable(output_data,
align = "rrrr",
rgroup = rgroup, n.rgroup = n.rgroup,
rgroupCSSseparator = "",
rowlabel = "",
caption = "Figure 3 Sources by Race",
ctable = TRUE )
| Figure 3 Sources by Race | |||||||
|
Total No. 3,488 |
Asian No. 808 |
Black No. 353 |
Hispanic No. 256 |
Other No. 331 |
White No. 1,740 |
P-value | |
|---|---|---|---|---|---|---|---|
| EGFR Status | |||||||
| Â Â 0 | 2,396 (68.7%) | 353 (43.7%) | 288 (81.6%) | 195 (76.2%) | 230 (69.5%) | 1,330 (76.4%) | < 0.0001 |
| Â Â 1 | 1,092 (31.3%) | 455 (56.3%) | 65 (18.4%) | 61 (23.8%) | 101 (30.5%) | 410 (23.6%) | |
getT1Stat <- function(varname, digits=1){
getDescriptionStatsBy(small1[, varname],
small1$Hospital.Location,
add_total_col=TRUE,
show_all_values=TRUE,
hrzl_prop=FALSE,
statistics=TRUE,
html=TRUE,
digits=digits,
header_count = TRUE
)
}
table_data <- list()
# Get the basic stats
table_data[["EGFR Status"]] <- getT1Stat("EGFRt1")
rgroup <- c()
n.rgroup <- c()
output_data <- NULL
for (varlabel in names(table_data)) {
output_data <- rbind(
output_data,
table_data[[varlabel]]
)
rgroup <- c(
rgroup,
varlabel
)
n.rgroup <- c(
n.rgroup,
nrow(table_data[[varlabel]])
)
}
htmlTable(output_data,
align = "rrrr",
rgroup = rgroup, n.rgroup = n.rgroup,
rgroupCSSseparator = "",
rowlabel = "",
caption = "Figure 3 Sources by Hospital",
ctable = TRUE )
| Figure 3 Sources by Hospital | |||||
|
Total No. 3,488 |
BMH No. 204 |
NYPQ No. 387 |
WCM No. 2,897 |
P-value | |
|---|---|---|---|---|---|
| EGFR Status | |||||
| Â Â 0 | 2,396 (68.7%) | 145 (71.1%) | 227 (58.7%) | 2,024 (69.9%) | < 0.0001 |
| Â Â 1 | 1,092 (31.3%) | 59 (28.9%) | 160 (41.3%) | 873 (30.1%) | |
small$EGFRt2 <- ifelse(small$EGFR_structure == "Positive", 1, 0)
small$EGFRt2 <- as.factor(as.character(small$EGFRt2))
getT1Stat <- function(varname, digits=1){
getDescriptionStatsBy(small[, varname],
small$Gender,
add_total_col=TRUE,
show_all_values=TRUE,
hrzl_prop=FALSE,
statistics=TRUE,
html=TRUE,
digits=digits,
header_count = TRUE
)
}
table_data <- list()
# Get the basic stats
table_data[["EGFR Status"]] <- getT1Stat("EGFRt2")
rgroup <- c()
n.rgroup <- c()
output_data <- NULL
for (varlabel in names(table_data)) {
output_data <- rbind(
output_data,
table_data[[varlabel]]
)
rgroup <- c(
rgroup,
varlabel
)
n.rgroup <- c(
n.rgroup,
nrow(table_data[[varlabel]])
)
}
htmlTable(output_data,
align = "rrrr",
rgroup = rgroup, n.rgroup = n.rgroup,
rgroupCSSseparator = "",
rowlabel = "",
caption = "Figure 3 Sources by Sex",
ctable = TRUE )
| Figure 3 Sources by Sex | ||||
|
Total No. 9,573 |
Female No. 5,033 |
Male No. 4,540 |
P-value | |
|---|---|---|---|---|
| EGFR Status | ||||
| Â Â 0 | 8,878 (92.7%) | 4,555 (90.5%) | 4,323 (95.2%) | < 0.0001 |
| Â Â 1 | 695 (7.3%) | 478 (9.5%) | 217 (4.8%) | |
getT1Stat <- function(varname, digits=1){
getDescriptionStatsBy(small[, varname],
small$smkstat2,
add_total_col=TRUE,
show_all_values=TRUE,
hrzl_prop=FALSE,
statistics=TRUE,
html=TRUE,
digits=digits,
header_count = TRUE
)
}
table_data <- list()
# Get the basic stats
table_data[["EGFR Status"]] <- getT1Stat("EGFRt2")
rgroup <- c()
n.rgroup <- c()
output_data <- NULL
for (varlabel in names(table_data)) {
output_data <- rbind(
output_data,
table_data[[varlabel]]
)
rgroup <- c(
rgroup,
varlabel
)
n.rgroup <- c(
n.rgroup,
nrow(table_data[[varlabel]])
)
}
htmlTable(output_data,
align = "rrrr",
rgroup = rgroup, n.rgroup = n.rgroup,
rgroupCSSseparator = "",
rowlabel = "",
caption = "Figure 3 Sources by SMK",
ctable = TRUE )
| Figure 3 Sources by SMK | |||||||
|
Total No. 9,573 |
1_Never No. 1,565 |
2_LongQuit No. 1,566 |
3_RecentQuit No. 3,221 |
4_Current No. 1,214 |
5_Unknown No. 2,007 |
P-value | |
|---|---|---|---|---|---|---|---|
| EGFR Status | |||||||
| Â Â 0 | 8,878 (92.7%) | 1,192 (76.2%) | 1,437 (91.8%) | 3,072 (95.4%) | 1,170 (96.4%) | 2,007 (100.0%) | < 0.0001 |
| Â Â 1 | 695 (7.3%) | 373 (23.8%) | 129 (8.2%) | 149 (4.6%) | 44 (3.6%) | 0 (0.0%) | |
getT1Stat <- function(varname, digits=1){
getDescriptionStatsBy(small[, varname],
small$Race,
add_total_col=TRUE,
show_all_values=TRUE,
hrzl_prop=FALSE,
statistics=TRUE,
html=TRUE,
digits=digits,
header_count = TRUE
)
}
table_data <- list()
# Get the basic stats
table_data[["EGFR Status"]] <- getT1Stat("EGFRt2")
rgroup <- c()
n.rgroup <- c()
output_data <- NULL
for (varlabel in names(table_data)) {
output_data <- rbind(
output_data,
table_data[[varlabel]]
)
rgroup <- c(
rgroup,
varlabel
)
n.rgroup <- c(
n.rgroup,
nrow(table_data[[varlabel]])
)
}
htmlTable(output_data,
align = "rrrr",
rgroup = rgroup, n.rgroup = n.rgroup,
rgroupCSSseparator = "",
rowlabel = "",
caption = "Figure 3 Sources by Race",
ctable = TRUE )
| Figure 3 Sources by Race | |||||||
|
Total No. 9,573 |
Asian No. 1,891 |
Black No. 1,310 |
Hispanic No. 705 |
Other No. 946 |
White No. 4,721 |
P-value | |
|---|---|---|---|---|---|---|---|
| EGFR Status | |||||||
| Â Â 0 | 8,878 (92.7%) | 1,604 (84.8%) | 1,258 (96.0%) | 660 (93.6%) | 868 (91.8%) | 4,488 (95.1%) | < 0.0001 |
| Â Â 1 | 695 (7.3%) | 287 (15.2%) | 52 (4.0%) | 45 (6.4%) | 78 (8.2%) | 233 (4.9%) | |
getT1Stat <- function(varname, digits=1){
getDescriptionStatsBy(small[, varname],
small$Hospital.Location,
add_total_col=TRUE,
show_all_values=TRUE,
hrzl_prop=FALSE,
statistics=TRUE,
html=TRUE,
digits=digits,
header_count = TRUE
)
}
table_data <- list()
# Get the basic stats
table_data[["EGFR Status"]] <- getT1Stat("EGFRt2")
rgroup <- c()
n.rgroup <- c()
output_data <- NULL
for (varlabel in names(table_data)) {
output_data <- rbind(
output_data,
table_data[[varlabel]]
)
rgroup <- c(
rgroup,
varlabel
)
n.rgroup <- c(
n.rgroup,
nrow(table_data[[varlabel]])
)
}
htmlTable(output_data,
align = "rrrr",
rgroup = rgroup, n.rgroup = n.rgroup,
rgroupCSSseparator = "",
rowlabel = "",
caption = "Figure 3 Sources by Hospital",
ctable = TRUE )
| Figure 3 Sources by Hospital | |||||
|
Total No. 9,573 |
BMH No. 1,964 |
NYPQ No. 1,329 |
WCM No. 6,280 |
P-value | |
|---|---|---|---|---|---|
| EGFR Status | |||||
| Â Â 0 | 8,878 (92.7%) | 1,908 (97.1%) | 1,227 (92.3%) | 5,743 (91.4%) | < 0.0001 |
| Â Â 1 | 695 (7.3%) | 56 (2.9%) | 102 (7.7%) | 537 (8.6%) | |
geo <- read.csv("/Volumes/Shieh-share$/EGFR/cleaned/Risk factors/1_AreaData_20250212.csv")
geo2 <- merge(small, geo, by = "pat_id");
geo2$lat <- as.numeric(as.character(geo2$lat))
geo2$long <- as.numeric(as.character(geo2$long))
geo2$county <- (as.character(geo2$county))
geo2$county <- ifelse(nchar(geo2$county) == 2, paste0("0", geo2$county), geo2$county )
geo2$FIPS_blockg <- ifelse(nchar(geo2$FIPS ) == 11, paste0("0", geo2$FIPS ), geo2$FIPS ) # Block group (12 digits)
geo2$FIPS_tract <- ifelse(nchar(geo2$GEOID ) == 10, paste0("0", geo2$GEOID ), geo2$GEOID ) # Census tract (11 digits)
geo2$FIPS_county <- substr(geo2$GEOID, 1, 9)
# Maps
nyc_counties <- c("061", "047", "081") # NYC FIPS codes, MCC catchment areas; Manhattan, Queens, Brooklyn
nyc_map <- map_data("county") %>% filter(region == "new york" & subregion %in% c("new york", "kings", "queens"))
tracts <- tigris::tracts(state = "NY", county= c("061", "081", "047"), cb = TRUE, year = 2023)
## | | | 0% | | | 1% | |= | 1% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 9% | |======= | 10% | |======= | 11% | |======== | 12% | |========= | 13% | |========== | 14% | |=========== | 15% | |=========== | 16% | |============ | 17% | |============ | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |=============== | 22% | |================ | 23% | |================= | 24% | |================= | 25% | |================== | 25% | |================== | 26% | |=================== | 27% | |==================== | 28% | |==================== | 29% | |===================== | 30% | |===================== | 31% | |====================== | 31% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |======================== | 35% | |========================= | 36% | |========================== | 37% | |=========================== | 38% | |=========================== | 39% | |============================ | 40% | |============================= | 41% | |============================= | 42% | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |================================ | 45% | |================================ | 46% | |================================= | 47% | |================================= | 48% | |================================== | 48% | |================================== | 49% | |=================================== | 50% | |==================================== | 51% | |==================================== | 52% | |===================================== | 53% | |===================================== | 54% | |====================================== | 54% | |======================================= | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 59% | |========================================== | 59% | |========================================== | 60% | |=========================================== | 61% | |=========================================== | 62% | |============================================ | 63% | |============================================= | 64% | |============================================== | 65% | |============================================== | 66% | |=============================================== | 67% | |================================================ | 68% | |================================================ | 69% | |================================================= | 70% | |================================================== | 71% | |================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |======================================================= | 78% | |======================================================= | 79% | |======================================================== | 80% | |======================================================== | 81% | |========================================================= | 81% | |========================================================== | 82% | |========================================================== | 83% | |=========================================================== | 84% | |=========================================================== | 85% | |============================================================ | 86% | |============================================================= | 87% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90% | |================================================================ | 91% | |================================================================ | 92% | |================================================================= | 92% | |================================================================= | 93% | |================================================================== | 94% | |=================================================================== | 95% | |=================================================================== | 96% | |==================================================================== | 97% | |==================================================================== | 98% | |===================================================================== | 98% | |======================================================================| 99% | |======================================================================| 100%
counties <- tigris::counties(state = "NY", cb = TRUE, year = 2023) %>% filter(COUNTYFP %in% c("061", "081", "047")) # Filter for NYC counties
## Downloading: 16 kB Downloading: 16 kB Downloading: 48 kB Downloading: 48 kB Downloading: 64 kB Downloading: 64 kB Downloading: 66 kB Downloading: 66 kB Downloading: 82 kB Downloading: 82 kB Downloading: 110 kB Downloading: 110 kB Downloading: 130 kB Downloading: 130 kB Downloading: 150 kB Downloading: 150 kB Downloading: 160 kB Downloading: 160 kB Downloading: 180 kB Downloading: 180 kB Downloading: 200 kB Downloading: 200 kB Downloading: 210 kB Downloading: 210 kB Downloading: 230 kB Downloading: 230 kB Downloading: 240 kB Downloading: 240 kB Downloading: 260 kB Downloading: 260 kB Downloading: 280 kB Downloading: 280 kB Downloading: 290 kB Downloading: 290 kB Downloading: 310 kB Downloading: 310 kB Downloading: 330 kB Downloading: 330 kB Downloading: 340 kB Downloading: 340 kB Downloading: 360 kB Downloading: 360 kB Downloading: 380 kB Downloading: 380 kB Downloading: 390 kB Downloading: 390 kB Downloading: 410 kB Downloading: 410 kB Downloading: 420 kB Downloading: 420 kB Downloading: 440 kB Downloading: 440 kB Downloading: 460 kB Downloading: 460 kB Downloading: 470 kB Downloading: 470 kB Downloading: 490 kB Downloading: 490 kB Downloading: 510 kB Downloading: 510 kB Downloading: 520 kB Downloading: 520 kB Downloading: 540 kB Downloading: 540 kB Downloading: 560 kB Downloading: 560 kB Downloading: 570 kB Downloading: 570 kB Downloading: 590 kB Downloading: 590 kB Downloading: 610 kB Downloading: 610 kB Downloading: 620 kB Downloading: 620 kB Downloading: 640 kB Downloading: 640 kB Downloading: 650 kB Downloading: 650 kB Downloading: 670 kB Downloading: 670 kB Downloading: 690 kB Downloading: 690 kB Downloading: 700 kB Downloading: 700 kB Downloading: 720 kB Downloading: 720 kB Downloading: 740 kB Downloading: 740 kB Downloading: 750 kB Downloading: 750 kB Downloading: 770 kB Downloading: 770 kB Downloading: 790 kB Downloading: 790 kB Downloading: 800 kB Downloading: 800 kB Downloading: 820 kB Downloading: 820 kB Downloading: 830 kB Downloading: 830 kB Downloading: 850 kB Downloading: 850 kB Downloading: 870 kB Downloading: 870 kB Downloading: 880 kB Downloading: 880 kB Downloading: 900 kB Downloading: 900 kB Downloading: 920 kB Downloading: 920 kB Downloading: 930 kB Downloading: 930 kB Downloading: 950 kB Downloading: 950 kB Downloading: 970 kB Downloading: 970 kB Downloading: 980 kB Downloading: 980 kB Downloading: 1,000 kB Downloading: 1,000 kB Downloading: 1 MB Downloading: 1 MB Downloading: 1 MB Downloading: 1 MB Downloading: 1 MB Downloading: 1 MB Downloading: 1.1 MB Downloading: 1.1 MB Downloading: 1.1 MB Downloading: 1.1 MB Downloading: 1.1 MB Downloading: 1.1 MB Downloading: 1.1 MB Downloading: 1.1 MB Downloading: 1.1 MB Downloading: 1.1 MB Downloading: 1.2 MB Downloading: 1.2 MB Downloading: 1.2 MB Downloading: 1.2 MB Downloading: 1.2 MB Downloading: 1.2 MB Downloading: 1.2 MB Downloading: 1.2 MB Downloading: 1.2 MB Downloading: 1.2 MB Downloading: 1.2 MB Downloading: 1.2 MB Downloading: 1.3 MB Downloading: 1.3 MB Downloading: 1.3 MB Downloading: 1.3 MB Downloading: 1.3 MB Downloading: 1.3 MB Downloading: 1.3 MB Downloading: 1.3 MB Downloading: 1.3 MB Downloading: 1.3 MB Downloading: 1.3 MB Downloading: 1.3 MB Downloading: 1.4 MB Downloading: 1.4 MB Downloading: 1.4 MB Downloading: 1.4 MB Downloading: 1.4 MB Downloading: 1.4 MB Downloading: 1.4 MB Downloading: 1.4 MB Downloading: 1.4 MB Downloading: 1.4 MB Downloading: 1.4 MB Downloading: 1.4 MB Downloading: 1.5 MB Downloading: 1.5 MB Downloading: 1.5 MB Downloading: 1.5 MB Downloading: 1.5 MB Downloading: 1.5 MB Downloading: 1.5 MB Downloading: 1.5 MB Downloading: 1.5 MB Downloading: 1.5 MB Downloading: 1.5 MB Downloading: 1.5 MB Downloading: 1.6 MB Downloading: 1.6 MB Downloading: 1.6 MB Downloading: 1.6 MB Downloading: 1.6 MB Downloading: 1.6 MB Downloading: 1.6 MB Downloading: 1.6 MB Downloading: 1.6 MB Downloading: 1.6 MB Downloading: 1.6 MB Downloading: 1.6 MB Downloading: 1.7 MB Downloading: 1.7 MB Downloading: 1.7 MB Downloading: 1.7 MB Downloading: 1.7 MB Downloading: 1.7 MB Downloading: 1.7 MB Downloading: 1.7 MB Downloading: 1.7 MB Downloading: 1.7 MB Downloading: 1.7 MB Downloading: 1.7 MB Downloading: 1.8 MB Downloading: 1.8 MB Downloading: 1.8 MB Downloading: 1.8 MB Downloading: 1.8 MB Downloading: 1.8 MB Downloading: 1.8 MB Downloading: 1.8 MB Downloading: 1.8 MB Downloading: 1.8 MB Downloading: 1.8 MB Downloading: 1.8 MB Downloading: 1.8 MB Downloading: 1.8 MB Downloading: 1.9 MB Downloading: 1.9 MB Downloading: 1.9 MB Downloading: 1.9 MB Downloading: 1.9 MB Downloading: 1.9 MB Downloading: 1.9 MB Downloading: 1.9 MB Downloading: 1.9 MB Downloading: 1.9 MB Downloading: 1.9 MB Downloading: 1.9 MB Downloading: 2 MB Downloading: 2 MB Downloading: 2 MB Downloading: 2 MB Downloading: 2 MB Downloading: 2 MB Downloading: 2 MB Downloading: 2 MB Downloading: 2 MB Downloading: 2 MB Downloading: 2 MB Downloading: 2 MB Downloading: 2.1 MB Downloading: 2.1 MB Downloading: 2.1 MB Downloading: 2.1 MB Downloading: 2.1 MB Downloading: 2.1 MB Downloading: 2.1 MB Downloading: 2.1 MB Downloading: 2.1 MB Downloading: 2.1 MB Downloading: 2.1 MB Downloading: 2.1 MB Downloading: 2.2 MB Downloading: 2.2 MB Downloading: 2.2 MB Downloading: 2.2 MB Downloading: 2.2 MB Downloading: 2.2 MB Downloading: 2.2 MB Downloading: 2.2 MB Downloading: 2.2 MB Downloading: 2.2 MB Downloading: 2.2 MB Downloading: 2.2 MB Downloading: 2.3 MB Downloading: 2.3 MB Downloading: 2.3 MB Downloading: 2.3 MB Downloading: 2.3 MB Downloading: 2.3 MB Downloading: 2.3 MB Downloading: 2.3 MB Downloading: 2.3 MB Downloading: 2.3 MB Downloading: 2.3 MB Downloading: 2.3 MB Downloading: 2.4 MB Downloading: 2.4 MB Downloading: 2.4 MB Downloading: 2.4 MB Downloading: 2.4 MB Downloading: 2.4 MB Downloading: 2.4 MB Downloading: 2.4 MB Downloading: 2.4 MB Downloading: 2.4 MB Downloading: 2.4 MB Downloading: 2.4 MB Downloading: 2.5 MB Downloading: 2.5 MB Downloading: 2.5 MB Downloading: 2.5 MB Downloading: 2.5 MB Downloading: 2.5 MB Downloading: 2.5 MB Downloading: 2.5 MB Downloading: 2.5 MB Downloading: 2.5 MB Downloading: 2.5 MB Downloading: 2.5 MB Downloading: 2.6 MB Downloading: 2.6 MB Downloading: 2.6 MB Downloading: 2.6 MB Downloading: 2.6 MB Downloading: 2.6 MB Downloading: 2.6 MB Downloading: 2.6 MB Downloading: 2.6 MB Downloading: 2.6 MB Downloading: 2.6 MB Downloading: 2.6 MB Downloading: 2.7 MB Downloading: 2.7 MB Downloading: 2.7 MB Downloading: 2.7 MB Downloading: 2.7 MB Downloading: 2.7 MB Downloading: 2.7 MB Downloading: 2.7 MB Downloading: 2.7 MB Downloading: 2.7 MB Downloading: 2.7 MB Downloading: 2.7 MB Downloading: 2.7 MB Downloading: 2.7 MB Downloading: 2.8 MB Downloading: 2.8 MB Downloading: 2.8 MB Downloading: 2.8 MB Downloading: 2.8 MB Downloading: 2.8 MB Downloading: 2.8 MB Downloading: 2.8 MB Downloading: 2.8 MB Downloading: 2.8 MB Downloading: 2.8 MB Downloading: 2.8 MB Downloading: 2.9 MB Downloading: 2.9 MB Downloading: 2.9 MB Downloading: 2.9 MB Downloading: 2.9 MB Downloading: 2.9 MB Downloading: 2.9 MB Downloading: 2.9 MB Downloading: 2.9 MB Downloading: 2.9 MB Downloading: 2.9 MB Downloading: 2.9 MB Downloading: 3 MB Downloading: 3 MB Downloading: 3 MB Downloading: 3 MB Downloading: 3 MB Downloading: 3 MB Downloading: 3 MB Downloading: 3 MB Downloading: 3 MB Downloading: 3 MB Downloading: 3 MB Downloading: 3 MB Downloading: 3.1 MB Downloading: 3.1 MB Downloading: 3.1 MB Downloading: 3.1 MB Downloading: 3.1 MB Downloading: 3.1 MB Downloading: 3.1 MB Downloading: 3.1 MB Downloading: 3.1 MB Downloading: 3.1 MB Downloading: 3.1 MB Downloading: 3.1 MB Downloading: 3.2 MB Downloading: 3.2 MB Downloading: 3.2 MB Downloading: 3.2 MB Downloading: 3.2 MB Downloading: 3.2 MB Downloading: 3.2 MB Downloading: 3.2 MB Downloading: 3.2 MB Downloading: 3.2 MB Downloading: 3.2 MB Downloading: 3.2 MB Downloading: 3.3 MB Downloading: 3.3 MB Downloading: 3.3 MB Downloading: 3.3 MB Downloading: 3.3 MB Downloading: 3.3 MB Downloading: 3.3 MB Downloading: 3.3 MB Downloading: 3.3 MB Downloading: 3.3 MB Downloading: 3.3 MB Downloading: 3.3 MB Downloading: 3.4 MB Downloading: 3.4 MB Downloading: 3.4 MB Downloading: 3.4 MB Downloading: 3.4 MB Downloading: 3.4 MB Downloading: 3.4 MB Downloading: 3.4 MB Downloading: 3.4 MB Downloading: 3.4 MB Downloading: 3.4 MB Downloading: 3.4 MB Downloading: 3.5 MB Downloading: 3.5 MB Downloading: 3.5 MB Downloading: 3.5 MB Downloading: 3.5 MB Downloading: 3.5 MB Downloading: 3.5 MB Downloading: 3.5 MB Downloading: 3.5 MB Downloading: 3.5 MB Downloading: 3.5 MB Downloading: 3.5 MB Downloading: 3.6 MB Downloading: 3.6 MB Downloading: 3.6 MB Downloading: 3.6 MB Downloading: 3.6 MB Downloading: 3.6 MB Downloading: 3.6 MB Downloading: 3.6 MB Downloading: 3.6 MB Downloading: 3.6 MB Downloading: 3.6 MB Downloading: 3.6 MB Downloading: 3.7 MB Downloading: 3.7 MB Downloading: 3.7 MB Downloading: 3.7 MB Downloading: 3.7 MB Downloading: 3.7 MB Downloading: 3.7 MB Downloading: 3.7 MB Downloading: 3.7 MB Downloading: 3.7 MB Downloading: 3.7 MB Downloading: 3.7 MB Downloading: 3.7 MB Downloading: 3.7 MB Downloading: 3.8 MB Downloading: 3.8 MB Downloading: 3.8 MB Downloading: 3.8 MB Downloading: 3.8 MB Downloading: 3.8 MB Downloading: 3.8 MB Downloading: 3.8 MB Downloading: 3.8 MB Downloading: 3.8 MB Downloading: 3.8 MB Downloading: 3.8 MB Downloading: 3.9 MB Downloading: 3.9 MB Downloading: 3.9 MB Downloading: 3.9 MB Downloading: 3.9 MB Downloading: 3.9 MB Downloading: 3.9 MB Downloading: 3.9 MB Downloading: 3.9 MB Downloading: 3.9 MB Downloading: 3.9 MB Downloading: 3.9 MB Downloading: 4 MB Downloading: 4 MB Downloading: 4 MB Downloading: 4 MB Downloading: 4 MB Downloading: 4 MB Downloading: 4 MB Downloading: 4 MB Downloading: 4 MB Downloading: 4 MB Downloading: 4 MB Downloading: 4 MB Downloading: 4.1 MB Downloading: 4.1 MB Downloading: 4.1 MB Downloading: 4.1 MB Downloading: 4.1 MB Downloading: 4.1 MB Downloading: 4.1 MB Downloading: 4.1 MB Downloading: 4.1 MB Downloading: 4.1 MB Downloading: 4.1 MB Downloading: 4.1 MB Downloading: 4.2 MB Downloading: 4.2 MB Downloading: 4.2 MB Downloading: 4.2 MB Downloading: 4.2 MB Downloading: 4.2 MB Downloading: 4.2 MB Downloading: 4.2 MB Downloading: 4.2 MB Downloading: 4.2 MB Downloading: 4.2 MB Downloading: 4.2 MB Downloading: 4.3 MB Downloading: 4.3 MB Downloading: 4.3 MB Downloading: 4.3 MB Downloading: 4.3 MB Downloading: 4.3 MB Downloading: 4.3 MB Downloading: 4.3 MB Downloading: 4.3 MB Downloading: 4.3 MB Downloading: 4.3 MB Downloading: 4.3 MB Downloading: 4.4 MB Downloading: 4.4 MB Downloading: 4.4 MB Downloading: 4.4 MB Downloading: 4.4 MB Downloading: 4.4 MB Downloading: 4.4 MB Downloading: 4.4 MB Downloading: 4.4 MB Downloading: 4.4 MB Downloading: 4.4 MB Downloading: 4.4 MB Downloading: 4.5 MB Downloading: 4.5 MB Downloading: 4.5 MB Downloading: 4.5 MB Downloading: 4.5 MB Downloading: 4.5 MB Downloading: 4.5 MB Downloading: 4.5 MB Downloading: 4.5 MB Downloading: 4.5 MB Downloading: 4.5 MB Downloading: 4.5 MB Downloading: 4.6 MB Downloading: 4.6 MB Downloading: 4.6 MB Downloading: 4.6 MB Downloading: 4.6 MB Downloading: 4.6 MB Downloading: 4.6 MB Downloading: 4.6 MB Downloading: 4.6 MB Downloading: 4.6 MB Downloading: 4.6 MB Downloading: 4.6 MB Downloading: 4.6 MB Downloading: 4.6 MB Downloading: 4.7 MB Downloading: 4.7 MB Downloading: 4.7 MB Downloading: 4.7 MB Downloading: 4.7 MB Downloading: 4.7 MB Downloading: 4.7 MB Downloading: 4.7 MB Downloading: 4.7 MB Downloading: 4.7 MB Downloading: 4.7 MB Downloading: 4.7 MB Downloading: 4.8 MB Downloading: 4.8 MB Downloading: 4.8 MB Downloading: 4.8 MB Downloading: 4.8 MB Downloading: 4.8 MB Downloading: 4.8 MB Downloading: 4.8 MB Downloading: 4.8 MB Downloading: 4.8 MB Downloading: 4.8 MB Downloading: 4.8 MB Downloading: 4.9 MB Downloading: 4.9 MB Downloading: 4.9 MB Downloading: 4.9 MB Downloading: 4.9 MB Downloading: 4.9 MB Downloading: 4.9 MB Downloading: 4.9 MB Downloading: 4.9 MB Downloading: 4.9 MB Downloading: 4.9 MB Downloading: 4.9 MB Downloading: 5 MB Downloading: 5 MB Downloading: 5 MB Downloading: 5 MB Downloading: 5 MB Downloading: 5 MB Downloading: 5 MB Downloading: 5 MB Downloading: 5 MB Downloading: 5 MB Downloading: 5 MB Downloading: 5 MB Downloading: 5.1 MB Downloading: 5.1 MB Downloading: 5.1 MB Downloading: 5.1 MB Downloading: 5.1 MB Downloading: 5.1 MB Downloading: 5.1 MB Downloading: 5.1 MB Downloading: 5.1 MB Downloading: 5.1 MB Downloading: 5.1 MB Downloading: 5.1 MB Downloading: 5.2 MB Downloading: 5.2 MB Downloading: 5.2 MB Downloading: 5.2 MB Downloading: 5.2 MB Downloading: 5.2 MB Downloading: 5.2 MB Downloading: 5.2 MB Downloading: 5.2 MB Downloading: 5.2 MB Downloading: 5.2 MB Downloading: 5.2 MB Downloading: 5.3 MB Downloading: 5.3 MB Downloading: 5.3 MB Downloading: 5.3 MB Downloading: 5.3 MB Downloading: 5.3 MB Downloading: 5.3 MB Downloading: 5.3 MB Downloading: 5.3 MB Downloading: 5.3 MB Downloading: 5.3 MB Downloading: 5.3 MB Downloading: 5.4 MB Downloading: 5.4 MB Downloading: 5.4 MB Downloading: 5.4 MB Downloading: 5.4 MB Downloading: 5.4 MB Downloading: 5.4 MB Downloading: 5.4 MB Downloading: 5.4 MB Downloading: 5.4 MB Downloading: 5.4 MB Downloading: 5.4 MB Downloading: 5.5 MB Downloading: 5.5 MB Downloading: 5.5 MB Downloading: 5.5 MB Downloading: 5.5 MB Downloading: 5.5 MB Downloading: 5.5 MB Downloading: 5.5 MB Downloading: 5.5 MB Downloading: 5.5 MB Downloading: 5.5 MB Downloading: 5.5 MB Downloading: 5.6 MB Downloading: 5.6 MB Downloading: 5.6 MB Downloading: 5.6 MB Downloading: 5.6 MB Downloading: 5.6 MB Downloading: 5.6 MB Downloading: 5.6 MB Downloading: 5.6 MB Downloading: 5.6 MB Downloading: 5.6 MB Downloading: 5.6 MB Downloading: 5.6 MB Downloading: 5.6 MB Downloading: 5.7 MB Downloading: 5.7 MB Downloading: 5.7 MB Downloading: 5.7 MB Downloading: 5.7 MB Downloading: 5.7 MB Downloading: 5.7 MB Downloading: 5.7 MB Downloading: 5.7 MB Downloading: 5.7 MB Downloading: 5.7 MB Downloading: 5.7 MB Downloading: 5.8 MB Downloading: 5.8 MB Downloading: 5.8 MB Downloading: 5.8 MB Downloading: 5.8 MB Downloading: 5.8 MB Downloading: 5.8 MB Downloading: 5.8 MB Downloading: 5.8 MB Downloading: 5.8 MB Downloading: 5.8 MB Downloading: 5.8 MB Downloading: 5.9 MB Downloading: 5.9 MB Downloading: 5.9 MB Downloading: 5.9 MB Downloading: 5.9 MB Downloading: 5.9 MB Downloading: 5.9 MB Downloading: 5.9 MB Downloading: 5.9 MB Downloading: 5.9 MB Downloading: 5.9 MB Downloading: 5.9 MB Downloading: 6 MB Downloading: 6 MB Downloading: 6 MB Downloading: 6 MB Downloading: 6 MB Downloading: 6 MB Downloading: 6 MB Downloading: 6 MB Downloading: 6 MB Downloading: 6 MB Downloading: 6 MB Downloading: 6 MB Downloading: 6.1 MB Downloading: 6.1 MB Downloading: 6.1 MB Downloading: 6.1 MB Downloading: 6.1 MB Downloading: 6.1 MB Downloading: 6.1 MB Downloading: 6.1 MB Downloading: 6.1 MB Downloading: 6.1 MB Downloading: 6.1 MB Downloading: 6.1 MB Downloading: 6.2 MB Downloading: 6.2 MB Downloading: 6.2 MB Downloading: 6.2 MB Downloading: 6.2 MB Downloading: 6.2 MB Downloading: 6.2 MB Downloading: 6.2 MB Downloading: 6.2 MB Downloading: 6.2 MB Downloading: 6.2 MB Downloading: 6.2 MB Downloading: 6.3 MB Downloading: 6.3 MB Downloading: 6.3 MB Downloading: 6.3 MB Downloading: 6.3 MB Downloading: 6.3 MB Downloading: 6.3 MB Downloading: 6.3 MB Downloading: 6.3 MB Downloading: 6.3 MB Downloading: 6.3 MB Downloading: 6.3 MB Downloading: 6.4 MB Downloading: 6.4 MB Downloading: 6.4 MB Downloading: 6.4 MB Downloading: 6.4 MB Downloading: 6.4 MB Downloading: 6.4 MB Downloading: 6.4 MB Downloading: 6.4 MB Downloading: 6.4 MB Downloading: 6.4 MB Downloading: 6.4 MB Downloading: 6.5 MB Downloading: 6.5 MB Downloading: 6.5 MB Downloading: 6.5 MB Downloading: 6.5 MB Downloading: 6.5 MB Downloading: 6.5 MB Downloading: 6.5 MB Downloading: 6.5 MB Downloading: 6.5 MB Downloading: 6.5 MB Downloading: 6.5 MB Downloading: 6.5 MB Downloading: 6.5 MB Downloading: 6.6 MB Downloading: 6.6 MB Downloading: 6.6 MB Downloading: 6.6 MB Downloading: 6.6 MB Downloading: 6.6 MB Downloading: 6.6 MB Downloading: 6.6 MB Downloading: 6.6 MB Downloading: 6.6 MB Downloading: 6.6 MB Downloading: 6.6 MB Downloading: 6.7 MB Downloading: 6.7 MB Downloading: 6.7 MB Downloading: 6.7 MB Downloading: 6.7 MB Downloading: 6.7 MB Downloading: 6.7 MB Downloading: 6.7 MB Downloading: 6.7 MB Downloading: 6.7 MB Downloading: 6.7 MB Downloading: 6.7 MB Downloading: 6.8 MB Downloading: 6.8 MB Downloading: 6.8 MB Downloading: 6.8 MB Downloading: 6.8 MB Downloading: 6.8 MB Downloading: 6.8 MB Downloading: 6.8 MB Downloading: 6.8 MB Downloading: 6.8 MB Downloading: 6.8 MB Downloading: 6.8 MB Downloading: 6.9 MB Downloading: 6.9 MB Downloading: 6.9 MB Downloading: 6.9 MB Downloading: 6.9 MB Downloading: 6.9 MB Downloading: 6.9 MB Downloading: 6.9 MB Downloading: 6.9 MB Downloading: 6.9 MB Downloading: 6.9 MB Downloading: 6.9 MB Downloading: 7 MB Downloading: 7 MB Downloading: 7 MB Downloading: 7 MB Downloading: 7 MB Downloading: 7 MB Downloading: 7 MB Downloading: 7 MB Downloading: 7 MB Downloading: 7 MB Downloading: 7 MB Downloading: 7 MB Downloading: 7.1 MB Downloading: 7.1 MB Downloading: 7.1 MB Downloading: 7.1 MB Downloading: 7.1 MB Downloading: 7.1 MB Downloading: 7.1 MB Downloading: 7.1 MB Downloading: 7.1 MB Downloading: 7.1 MB Downloading: 7.1 MB Downloading: 7.1 MB Downloading: 7.2 MB Downloading: 7.2 MB Downloading: 7.2 MB Downloading: 7.2 MB Downloading: 7.2 MB Downloading: 7.2 MB Downloading: 7.2 MB Downloading: 7.2 MB Downloading: 7.2 MB Downloading: 7.2 MB Downloading: 7.2 MB Downloading: 7.2 MB Downloading: 7.3 MB Downloading: 7.3 MB Downloading: 7.3 MB Downloading: 7.3 MB Downloading: 7.3 MB Downloading: 7.3 MB Downloading: 7.3 MB Downloading: 7.3 MB Downloading: 7.3 MB Downloading: 7.3 MB Downloading: 7.3 MB Downloading: 7.3 MB Downloading: 7.4 MB Downloading: 7.4 MB Downloading: 7.4 MB Downloading: 7.4 MB Downloading: 7.4 MB Downloading: 7.4 MB Downloading: 7.4 MB Downloading: 7.4 MB Downloading: 7.4 MB Downloading: 7.4 MB Downloading: 7.4 MB Downloading: 7.4 MB Downloading: 7.5 MB Downloading: 7.5 MB Downloading: 7.5 MB Downloading: 7.5 MB Downloading: 7.5 MB Downloading: 7.5 MB Downloading: 7.5 MB Downloading: 7.5 MB Downloading: 7.5 MB Downloading: 7.5 MB Downloading: 7.5 MB Downloading: 7.5 MB Downloading: 7.5 MB Downloading: 7.5 MB Downloading: 7.6 MB Downloading: 7.6 MB Downloading: 7.6 MB Downloading: 7.6 MB Downloading: 7.6 MB Downloading: 7.6 MB Downloading: 7.6 MB Downloading: 7.6 MB Downloading: 7.6 MB Downloading: 7.6 MB Downloading: 7.6 MB Downloading: 7.6 MB Downloading: 7.7 MB Downloading: 7.7 MB Downloading: 7.7 MB Downloading: 7.7 MB Downloading: 7.7 MB Downloading: 7.7 MB Downloading: 7.7 MB Downloading: 7.7 MB Downloading: 7.7 MB Downloading: 7.7 MB Downloading: 7.7 MB Downloading: 7.7 MB Downloading: 7.8 MB Downloading: 7.8 MB Downloading: 7.8 MB Downloading: 7.8 MB Downloading: 7.8 MB Downloading: 7.8 MB Downloading: 7.8 MB Downloading: 7.8 MB Downloading: 7.8 MB Downloading: 7.8 MB Downloading: 7.8 MB Downloading: 7.8 MB Downloading: 7.9 MB Downloading: 7.9 MB Downloading: 7.9 MB Downloading: 7.9 MB Downloading: 7.9 MB Downloading: 7.9 MB Downloading: 7.9 MB Downloading: 7.9 MB Downloading: 7.9 MB Downloading: 7.9 MB Downloading: 7.9 MB Downloading: 7.9 MB Downloading: 8 MB Downloading: 8 MB Downloading: 8 MB Downloading: 8 MB Downloading: 8 MB Downloading: 8 MB Downloading: 8 MB Downloading: 8 MB Downloading: 8 MB Downloading: 8 MB Downloading: 8 MB Downloading: 8 MB Downloading: 8.1 MB Downloading: 8.1 MB Downloading: 8.1 MB Downloading: 8.1 MB Downloading: 8.1 MB Downloading: 8.1 MB Downloading: 8.1 MB Downloading: 8.1 MB Downloading: 8.1 MB Downloading: 8.1 MB Downloading: 8.1 MB Downloading: 8.1 MB Downloading: 8.2 MB Downloading: 8.2 MB Downloading: 8.2 MB Downloading: 8.2 MB Downloading: 8.2 MB Downloading: 8.2 MB Downloading: 8.2 MB Downloading: 8.2 MB Downloading: 8.2 MB Downloading: 8.2 MB Downloading: 8.2 MB Downloading: 8.2 MB Downloading: 8.3 MB Downloading: 8.3 MB Downloading: 8.3 MB Downloading: 8.3 MB Downloading: 8.3 MB Downloading: 8.3 MB Downloading: 8.3 MB Downloading: 8.3 MB Downloading: 8.3 MB Downloading: 8.3 MB Downloading: 8.3 MB Downloading: 8.3 MB Downloading: 8.4 MB Downloading: 8.4 MB Downloading: 8.4 MB Downloading: 8.4 MB Downloading: 8.4 MB Downloading: 8.4 MB Downloading: 8.4 MB Downloading: 8.4 MB Downloading: 8.4 MB Downloading: 8.4 MB Downloading: 8.4 MB Downloading: 8.4 MB Downloading: 8.4 MB Downloading: 8.4 MB Downloading: 8.5 MB Downloading: 8.5 MB Downloading: 8.5 MB Downloading: 8.5 MB Downloading: 8.5 MB Downloading: 8.5 MB Downloading: 8.5 MB Downloading: 8.5 MB Downloading: 8.5 MB Downloading: 8.5 MB Downloading: 8.5 MB Downloading: 8.5 MB Downloading: 8.6 MB Downloading: 8.6 MB Downloading: 8.6 MB Downloading: 8.6 MB Downloading: 8.6 MB Downloading: 8.6 MB Downloading: 8.6 MB Downloading: 8.6 MB Downloading: 8.6 MB Downloading: 8.6 MB Downloading: 8.6 MB Downloading: 8.6 MB Downloading: 8.7 MB Downloading: 8.7 MB Downloading: 8.7 MB Downloading: 8.7 MB Downloading: 8.7 MB Downloading: 8.7 MB Downloading: 8.7 MB Downloading: 8.7 MB Downloading: 8.7 MB Downloading: 8.7 MB Downloading: 8.7 MB Downloading: 8.7 MB Downloading: 8.8 MB Downloading: 8.8 MB Downloading: 8.8 MB Downloading: 8.8 MB Downloading: 8.8 MB Downloading: 8.8 MB Downloading: 8.8 MB Downloading: 8.8 MB Downloading: 8.8 MB Downloading: 8.8 MB Downloading: 8.8 MB Downloading: 8.8 MB Downloading: 8.9 MB Downloading: 8.9 MB Downloading: 8.9 MB Downloading: 8.9 MB Downloading: 8.9 MB Downloading: 8.9 MB Downloading: 8.9 MB Downloading: 8.9 MB Downloading: 8.9 MB Downloading: 8.9 MB Downloading: 8.9 MB Downloading: 8.9 MB Downloading: 9 MB Downloading: 9 MB Downloading: 9 MB Downloading: 9 MB Downloading: 9 MB Downloading: 9 MB Downloading: 9 MB Downloading: 9 MB Downloading: 9 MB Downloading: 9 MB Downloading: 9 MB Downloading: 9 MB Downloading: 9.1 MB Downloading: 9.1 MB Downloading: 9.1 MB Downloading: 9.1 MB Downloading: 9.1 MB Downloading: 9.1 MB Downloading: 9.1 MB Downloading: 9.1 MB Downloading: 9.1 MB Downloading: 9.1 MB Downloading: 9.1 MB Downloading: 9.1 MB Downloading: 9.2 MB Downloading: 9.2 MB Downloading: 9.2 MB Downloading: 9.2 MB Downloading: 9.2 MB Downloading: 9.2 MB Downloading: 9.2 MB Downloading: 9.2 MB Downloading: 9.2 MB Downloading: 9.2 MB Downloading: 9.2 MB Downloading: 9.2 MB Downloading: 9.3 MB Downloading: 9.3 MB Downloading: 9.3 MB Downloading: 9.3 MB Downloading: 9.3 MB Downloading: 9.3 MB Downloading: 9.3 MB Downloading: 9.3 MB Downloading: 9.3 MB Downloading: 9.3 MB Downloading: 9.3 MB Downloading: 9.3 MB Downloading: 9.4 MB Downloading: 9.4 MB Downloading: 9.4 MB Downloading: 9.4 MB Downloading: 9.4 MB Downloading: 9.4 MB Downloading: 9.4 MB Downloading: 9.4 MB Downloading: 9.4 MB Downloading: 9.4 MB Downloading: 9.4 MB Downloading: 9.4 MB Downloading: 9.4 MB Downloading: 9.4 MB Downloading: 9.5 MB Downloading: 9.5 MB Downloading: 9.5 MB Downloading: 9.5 MB Downloading: 9.5 MB Downloading: 9.5 MB Downloading: 9.5 MB Downloading: 9.5 MB Downloading: 9.5 MB Downloading: 9.5 MB Downloading: 9.5 MB Downloading: 9.5 MB Downloading: 9.6 MB Downloading: 9.6 MB Downloading: 9.6 MB Downloading: 9.6 MB Downloading: 9.6 MB Downloading: 9.6 MB Downloading: 9.6 MB Downloading: 9.6 MB Downloading: 9.6 MB Downloading: 9.6 MB Downloading: 9.6 MB Downloading: 9.6 MB Downloading: 9.7 MB Downloading: 9.7 MB Downloading: 9.7 MB Downloading: 9.7 MB Downloading: 9.7 MB Downloading: 9.7 MB Downloading: 9.7 MB Downloading: 9.7 MB Downloading: 9.7 MB Downloading: 9.7 MB Downloading: 9.7 MB Downloading: 9.7 MB Downloading: 9.8 MB Downloading: 9.8 MB Downloading: 9.8 MB Downloading: 9.8 MB Downloading: 9.8 MB Downloading: 9.8 MB Downloading: 9.8 MB Downloading: 9.8 MB Downloading: 9.8 MB Downloading: 9.8 MB Downloading: 9.8 MB Downloading: 9.8 MB Downloading: 9.9 MB Downloading: 9.9 MB Downloading: 9.9 MB Downloading: 9.9 MB Downloading: 9.9 MB Downloading: 9.9 MB Downloading: 9.9 MB Downloading: 9.9 MB Downloading: 9.9 MB Downloading: 9.9 MB Downloading: 9.9 MB Downloading: 9.9 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 10 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 11 MB Downloading: 12 MB Downloading: 12 MB Downloading: 12 MB Downloading: 12 MB Downloading: 12 MB Downloading: 12 MB Downloading: 12 MB Downloading: 12 MB Downloading: 12 MB Downloading: 12 MB Downloading: 12 MB Downloading: 12 MB Downloading: 12 MB Downloading: 12 MB
points1 <- subset(geo2, state == "36" & county %in% nyc_counties & EGFR_FINAL == "Positive") # EGFR positive patient points
points2 <- subset(geo2, state == "36" & county %in% nyc_counties & EGFR_FINAL != "Positive") # EGFR negative patient points
########################################################
##### Total count of our MCC-MELD patients
a <- table(geo2$FIPS_tract)
b <- data.frame(a)
colnames(b) <- c("GEOID_Tract", "Current_Count")
c <- b
c$GEOID_Tract <- as.character(c$GEOID_Tract)
##### Get population count from Census survey
pop_data <- get_decennial(
geography = "tract",
variables = "P1_001N",
state = "NY", # Change to your state
county = c("061", "047", "081"),
year = 2020,
sumfile = "pl"
)
## Getting data from the 2020 decennial Census
## Using the PL 94-171 Redistricting Data Summary File
## Note: 2020 decennial Census data use differential privacy, a technique that
## introduces errors into data to preserve respondent confidentiality.
## ℹ Small counts should be interpreted with caution.
## ℹ See https://www.census.gov/library/fact-sheets/2021/protecting-the-confidentiality-of-the-2020-census-redistricting-data.html for additional guidance.
## This message is displayed once per session.
pop <- data.frame(pop_data)
pop1 <- pop[c(1, 4)]
colnames(pop1) <- c("GEOID_Tract", "Pop")
pop2 <- merge(c, pop1, by = "GEOID_Tract", all.y=T); dim(c); dim(pop1); dim(pop2)
## [1] 2883 2
## [1] 1840 2
## [1] 1840 3
# [1] 2883 2
# [1] 1840 2
# [1] 1490 3
pop2$LCrate <- round(pop2$Current_Count/pop2$Pop * 100000,1) # if pop < 1000, we didn't estimate
pop2$LCrate[pop2$Pop < 1000] <- 0 # I also tried NA, but it looks a bit confusing
pop2$LCrate[is.na(pop2$LCrate)] <- 0
summary(pop2$LCrate) # per 100,000
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 27.10 68.00 93.03 136.05 950.00
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.00 27.10 68.00 93.03 136.05 950.00
geo3 <- pop2
### Shape file#####
tracts <- tigris::tracts(state = "NY", county= c( "061", "081","047"), cb = TRUE, year = 2023)
tracts$ID <- as.character(tracts$GEOID)
geo3$ID <- as.character(geo3$GEOID_Tract)
merged_data <- tracts %>%
left_join(geo3, by = "ID")
merged_data$LCrate[merged_data$LCrate > 500] <- 500
ggplot(data = merged_data) +
geom_sf(aes(fill = LCrate), color = NA) + # Use 'income' for the fill gradient
scale_fill_viridis_c(option = "rocket", name = "Rates", limits=c(0,500)) + # Gradient color scale
theme_minimal() +
geom_sf(data = counties, fill = NA, color = "black", size = 10) +
labs(
title = "LC rates (per 100,000)",
subtitle = "Source: MCC-MELD (Numerator) and Census (Denominator)"
#caption = "Data Source: MCC-MELD and Census Bureau"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)+
theme(
plot.title = element_text(size = 20),
plot.subtitle = element_text(size = 16) # Change the number to increase/decrease size
)
# ggplot(data = merged_data) +
# geom_sf(aes(fill = LCrate), color = NA) + # Use 'income' for the fill gradient
# scale_fill_viridis_c(option = "rocket", name = "LC rate/100,000", limits=c(0,500)) + # Gradient color scale
# theme_minimal() +
# # geom_point(data = points2, aes(x = long, y = lat), color = "red", size = 0.08, alpha = 0.7) +
# # geom_point(data = points1, aes(x = long, y = lat), color = "green", size = 0.2, alpha = 0.7) +
# geom_sf(data = counties, fill = NA, color = "black", size = 10) +
# labs(
# title = "LC rates (per 100,000) by Census tract",
# subtitle = "MCC Catchment Areas: Manhattan, Queens, and Brooklyn",
# caption = "Data Source: MCC-MELD and Census Bureau"
# ) +
# theme(
# plot.title = element_text(hjust = 0.5),
# plot.subtitle = element_text(hjust = 0.5)
# )
points1 <- subset(geo2, state == "36" & county %in% nyc_counties & EGFR_FINAL == "Positive") # EGFR positive patient points
points2 <- subset(geo2, state == "36" & county %in% nyc_counties & EGFR_FINAL != "Positive") # EGFR negative patient points
########################################################
##### Total count of our MCC-MELD patients
geo2$egfrp <- ifelse(geo2$EGFR_structure == "Positive" , 1, 0)
dat <- subset(geo2, egfrp==1)
##### Total count of our MCC-MELD patients
a <- table(dat$FIPS_tract)
b <- data.frame(a)
colnames(b) <- c("GEOID_Tract", "Current_Count")
c <- b
c$GEOID_Tract <- as.character(c$GEOID_Tract)
##### Get population count from Census survey
pop_data <- get_decennial(
geography = "tract",
variables = "P1_001N",
state = "NY", # Change to your state
county = c("061", "047", "081"),
year = 2020,
sumfile = "pl"
)
## Getting data from the 2020 decennial Census
## Using the PL 94-171 Redistricting Data Summary File
pop <- data.frame(pop_data)
pop1 <- pop[c(1, 4)]
colnames(pop1) <- c("GEOID_Tract", "Pop")
pop2 <- merge(c, pop1, by = "GEOID_Tract", all.y=T); dim(c); dim(pop1); dim(pop2)
## [1] 517 2
## [1] 1840 2
## [1] 1840 3
# [1] 2883 2
# [1] 1840 2
# [1] 1490 3
pop2$LCrate <- round(pop2$Current_Count/pop2$Pop * 100000,1) # if pop < 1000, we didn't estimate
pop2$LCrate[pop2$Pop < 1000] <- 0
pop2$LCrate[is.na(pop2$LCrate)] <- 0
summary(pop2$LCrate) # per 100,000 <--- EGFR LC rate
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 0.00 5.76 0.00 133.20
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.00 0.00 0.00 5.76 0.00 133.20
# summary(pop2$LCrate) # per 100,000 <-- TOtal LC rate
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.00 27.10 68.00 93.03 136.05 950.00
geo3 <- pop2
### Shape file#####
tracts <- tigris::tracts(state = "NY", county= c( "061", "081","047"), cb = TRUE, year = 2023)
tracts$ID <- as.character(tracts$GEOID)
geo3$ID <- as.character(geo3$GEOID_Tract)
merged_data <- tracts %>%
left_join(geo3, by = "ID")
merged_data$LCrate[merged_data$LCrate > 50] <- 50
ggplot(data = merged_data) +
geom_sf(aes(fill = LCrate), color = NA) + # Use 'income' for the fill gradient
scale_fill_viridis_c(option = "mako", name = "Rates", limits=c(0,50)) + # Gradient color scale
theme_minimal() +
geom_sf(data = counties, fill = NA, color = "black", size = 10) +
labs(
title = "EGFRm LC rates (per 100,000)",
subtitle = "Source: MCC-MELD (Numerator) and Census (Denominator)"
#caption = "Data Source: MCC-MELD and Census Bureau"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)+
theme(
plot.title = element_text(size = 20),
plot.subtitle = element_text(size = 16) # Change the number to increase/decrease size
)
#
# ggplot(data = merged_data) +
# geom_sf(aes(fill = LCrate), color = NA) + # Use 'income' for the fill gradient
# scale_fill_viridis_c(option = "mako", name = "EGFRm LC rate/100,000", limits=c(0,100)) + # Gradient color scale
# #geom_point(data = points1, aes(x = long, y = lat), color = "green", size = 0.05, alpha = 0.7) +
# theme_minimal() +
# geom_sf(data = counties, fill = NA, color = "black", size = 10) +
# labs(
# title = "EGFRm LC rates (per 100,000) by Census tract",
# subtitle = "MCC Catchment Areas: Manhattan, Queens, and Brooklyn",
# caption = "Data Source: MCC-MELD and Census Bureau"
# ) +
# theme(
# plot.title = element_text(hjust = 0.5),
# plot.subtitle = element_text(hjust = 0.5)
# )
#
#
# ggplot(data = merged_data) +
# geom_sf(aes(fill = LCrate), color = NA) + # Use 'income' for the fill gradient
# scale_fill_viridis_c(option = "mako", name = "EGFRm LC rate/100,000", limits=c(0,200)) + # Gradient color scale
# geom_point(data = points1, aes(x = long, y = lat), color = "orange", size = 0.03, alpha = 0.7) +
# theme_minimal() +
# geom_sf(data = counties, fill = NA, color = "black", size = 10) +
# labs(
# title = "EGFRm LC rates (per 100,000) by Census tract",
# subtitle = "MCC Catchment Areas: Manhattan, Queens, and Brooklyn",
# caption = "Data Source: MCC-MELD and Census Bureau"
# ) +
# theme(
# plot.title = element_text(hjust = 0.5),
# plot.subtitle = element_text(hjust = 0.5)
# )
##### Number of ever smokers in MCC-MELD
geo2$ever <- ifelse(geo2$smkstat1 == "Negative" , 0, 1)
dat <- subset(geo2, ever==1)
##### Total count of our MCC-MELD patients
a <- table(dat$FIPS_tract)
b <- data.frame(a)
colnames(b) <- c("GEOID_Tract", "Current_Count")
c <- b
c$GEOID_Tract <- as.character(c$GEOID_Tract)
##### Get population count from Census survey
pop_data <- get_decennial(
geography = "tract",
variables = "P1_001N",
state = "NY", # Change to your state
county = c("061", "047", "081"),
year = 2020,
sumfile = "pl"
)
## Getting data from the 2020 decennial Census
## Using the PL 94-171 Redistricting Data Summary File
pop <- data.frame(pop_data)
pop1 <- pop[c(1, 4)]
colnames(pop1) <- c("GEOID_Tract", "Pop")
pop2 <- merge(c, pop1, by = "GEOID_Tract", all.y=T); dim(c); dim(pop1); dim(pop2)
## [1] 2883 2
## [1] 1840 2
## [1] 1840 3
# [1] 2883 2
# [1] 1840 2
# [1] 1490 3
pop2$EVERrate <- round(pop2$Current_Count/pop2$Pop * 100000,1) # if pop < 1000, we didn't estimate
pop2$EVERrate[pop2$Pop < 1000] <- 0
pop2$EVERrate[is.na(pop2$EVERrate)] <- 0
summary(pop2$EVERrate) # per 100,000 <--- EGFR LC rate
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 27.10 68.00 93.03 136.05 950.00
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.00 27.10 68.00 93.03 136.05 950.00
geo3 <- pop2
### Shape file#####
tracts <- tigris::tracts(state = "NY", county= c( "061", "081","047"), cb = TRUE, year = 2023)
tracts$ID <- as.character(tracts$GEOID)
geo3$ID <- as.character(geo3$GEOID_Tract)
merged_data <- tracts %>%
left_join(geo3, by = "ID")
# ggplot(data = merged_data) +
# geom_sf(aes(fill = EVERrate), color = NA) + # Use 'income' for the fill gradient
# scale_fill_viridis_c(option = "cividis", name = "Ever Smoking/100,000", limits=c(0,1000)) + # Gradient color scale
# theme_minimal() +
# geom_sf(data = counties, fill = NA, color = "black", size = 10) +
# labs(
# title = "Ever smoking rates (per 100,000) by Census tract",
# subtitle = "MCC Catchment Areas: Manhattan, Queens, and Brooklyn",
# caption = "Data Source: MCC-MELD and Census Bureau"
# ) +
# theme(
# plot.title = element_text(hjust = 0.5),
# plot.subtitle = element_text(hjust = 0.5)
# )
merged_data$EVERrate[merged_data$EVERrate > 300] <- 300
ggplot(data = merged_data) +
geom_sf(aes(fill = EVERrate), color = NA) + # Use 'income' for the fill gradient
scale_fill_viridis_c(option = "cividis", name = "Rates", limits=c(0,300)) + # Gradient color scale
theme_minimal() +
geom_sf(data = counties, fill = NA, color = "black", size = 10) +
labs(
title = "Ever smoking rates (per 100,000)",
subtitle = "Source: MCC-MELD (Numerator) and Census (Denominator)"
#caption = "Data Source: MCC-MELD and Census Bureau"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)+
theme(
plot.title = element_text(size = 20),
plot.subtitle = element_text(size = 16) # Change the number to increase/decrease size
)
pm <- read.csv("/Volumes/Shieh-share$/EGFR/cleaned/Risk factors/Archives/Book1.csv")
pm$fips <- as.character(pm$ctfips)
dim(pm) # [1] 70498 10
## [1] 70498 10
length(unique(pm$fips)) # [1] 4901
## [1] 4901
pm1 <- pm[!duplicated(pm[c("fips")]),]
pm2 <- pm1 %>%
group_by(fips) %>%
summarize(pm1 = mean(DS_PM_pred, na.rm = TRUE)) %>%
ungroup()
pm2$FIPS_tract <- pm2$fips
### Plotting #####
tracts <- tigris::tracts(state = "NY", county= c( "061", "081","047"), cb = TRUE, year = 2023)
tracts$ID <- as.character(tracts$GEOID)
pm2$ID <- as.character(pm2$fips)
merged_data <- tracts %>%
left_join(pm2, by = "ID")
summary(merged_data$pm1)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 9.642 10.397 10.572 10.588 10.785 11.228 223
# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
# 9.642 10.397 10.572 10.588 10.785 11.228 223
ggplot(data = merged_data) +
geom_sf(aes(fill = pm1), color = NA) + # Use 'income' for the fill gradient
scale_fill_viridis_c(option = "inferno", name = "Range", limits=c(9.6,11.3)) + # Gradient color scale
theme_minimal() +
geom_sf(data = counties, fill = NA, color = "black", size = 10) +
labs(
title = "Averaged PM2.5 in 2015",
subtitle = "Source: CDC"
#caption = "Data Source: CDC and Census Bureau"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)+
theme(
plot.title = element_text(size = 20),
plot.subtitle = element_text(size = 16) # Change the number to increase/decrease size
)
# https://www.neighborhoodatlas.medicine.wisc.edu/download
adi <- read.csv("/Volumes/Shieh-share$/EGFR/cleaned/Risk factors/Archives/ADI/US_2022_ADI_Census_Block_Group_v4_0_1.csv")
# All 2022 latest data. We can decide what to use later.
adi$FIPS <- as.factor(adi$FIPS)
head(adi)
## GISJOIN FIPS ADI_NATRANK ADI_STATERNK
## 1 G01000100201001 10010201001 76 5
## 2 G01000100201002 10010201002 74 5
## 3 G01000100202001 10010202001 82 6
## 4 G01000100202002 10010202002 82 6
## 5 G01000100203001 10010203001 71 4
## 6 G01000100203002 10010203002 88 7
# GISJOIN FIPS ADI_NATRANK ADI_STATERNK
# 1 G01000100201001 10010201001 76 5
# 2 G01000100201002 10010201002 74 5
# 3 G01000100202001 10010202001 82 6
# 4 G01000100202002 10010202002 82 6
# 5 G01000100203001 10010203001 71 4
# 6 G01000100203002 10010203002 88 7
table(adi$ADI_NATRANK, useNA="always")
##
## 1 10 100 11 12 13 14 15 16 17 18 19 2
## 2360 2360 2359 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360
## 20 21 22 23 24 25 26 27 28 29 3 30 31
## 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360
## 32 33 34 35 36 37 38 39 4 40 41 42 43
## 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360
## 44 45 46 47 48 49 5 50 51 52 53 54 55
## 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360 2359 2359 2359
## 56 57 58 59 6 60 61 62 63 64 65 66 67
## 2359 2359 2359 2359 2360 2359 2359 2359 2359 2359 2359 2359 2359
## 68 69 7 70 71 72 73 74 75 76 77 78 79
## 2359 2359 2360 2359 2359 2359 2359 2359 2359 2359 2359 2359 2359
## 8 80 81 82 83 84 85 86 87 88 89 9 90
## 2360 2359 2359 2359 2359 2359 2359 2359 2359 2359 2359 2360 2359
## 91 92 93 94 95 96 97 98 99 GQ GQ-PH PH QDI
## 2359 2359 2359 2359 2359 2359 2359 2359 2359 2795 804 2561 224
## <NA>
## 0
# Suppression Codes:
# GQ (Group Quarters) - Greater than 33.3% of Housing Units are Group Quarters
# PH (Population/Housing) - Population less than 100 and/or fewer than 30 housing units
# GQ-PH (Group Quarters and Population Housing) - Both GQ and PH conditions are met
# QDI (Questionable Data Integrity) - Block Groups missing a key demographic factor for ADI construction (supressed or missing in the ACS data)
adi$ADI_NATRANK[ adi$ADI_NATRANK == "GQ"] <- NA
adi$ADI_NATRANK[ adi$ADI_NATRANK == "GQ-PH"] <- NA
adi$ADI_NATRANK[ adi$ADI_NATRANK == "PH"] <- NA
adi$ADI_NATRANK[ adi$ADI_NATRANK == "QDI"] <- NA
adi$ADI_STATERNK[ adi$ADI_STATERNK == "GQ"] <- NA
adi$ADI_STATERNK[ adi$ADI_STATERNK == "GQ-PH"] <- NA
adi$ADI_STATERNK[ adi$ADI_STATERNK == "PH"] <- NA
adi$ADI_STATERNK[ adi$ADI_STATERNK == "QDI"] <- NA
geo <- read.csv("/Volumes/Shieh-share$/EGFR/cleaned/Risk factors/1_AreaData_20250212.csv")
geo2 <- merge(small, geo, by = "pat_id");
#head(geo2)
geo2$ADI_NATRANK <- as.numeric(geo2$ADI_NATRANK)
geo2$ADI_STATERNK <- as.numeric(geo2$ADI_STATERNK)
# Convert GEOID to character type for consistency
tracts$GEOID <- as.character(tracts$GEOID)
geo2$GEOID <- as.character(geo2$GEOID)
merged_data <- tracts %>%
left_join(geo2, by = "GEOID")
#merged_data$ADI_STATERNK <- as.factor(as.character(merged_data$ADI_STATERNK))
ggplot(data = merged_data) +
geom_sf(aes(fill = ADI_STATERNK), color = NA) + # Use 'income' for the fill gradient
geom_sf(data = counties, fill = NA, color = "black", size = 10) +
scale_fill_viridis_c(option = "plasma", name = "ADI rank") + # Gradient color scale
theme_minimal() +
labs(
title = "ADI (1: Least vs. 10: Most Deprived)",
subtitle = "Source: Neighborhood Atlas"
#caption = "Data Source: MCC-MELD, Neighborhood Atlas, and Census Bureau"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)+
theme(
plot.title = element_text(size = 20),
plot.subtitle = element_text(size = 16) # Change the number to increase/decrease size
)
ggplot(data = merged_data) +
geom_sf(aes(fill = Prop.ForeignBorn), color = NA) + # Use 'income' for the fill gradient
geom_sf(data = counties, fill = NA, color = "black", size = 10) +
scale_fill_viridis_c(option = "plasma", name = "Foreign Born (%)", limits=c(0,1)) + # Gradient color scale
theme_minimal() +
labs(
title = "Prop (%) Foreign Born",
subtitle = "Source: American Community Survey"
#caption = "Data Source: MCC-MELD and Census Bureau"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)+
theme(
plot.title = element_text(size = 20),
plot.subtitle = element_text(size = 16) # Change the number to increase/decrease size
)
geo <- read.csv("/Volumes/Shieh-share$/EGFR/cleaned/Risk factors/1_AreaData_20250212.csv")
geo2 <- merge(small, geo, by = "pat_id");
geo2$lat <- as.numeric(as.character(geo2$lat))
geo2$long <- as.numeric(as.character(geo2$long))
geo2$county <- (as.character(geo2$county))
geo2$county <- ifelse(nchar(geo2$county) == 2, paste0("0", geo2$county), geo2$county )
geo2$FIPS_blockg <- ifelse(nchar(geo2$FIPS ) == 11, paste0("0", geo2$FIPS ), geo2$FIPS ) # Block group (12 digits)
geo2$FIPS_tract <- ifelse(nchar(geo2$GEOID ) == 10, paste0("0", geo2$GEOID ), geo2$GEOID ) # Census tract (11 digits)
geo2$FIPS_county <- substr(geo2$GEOID, 1, 9)
# Maps
nyc_counties <- c("061", "047", "081") # NYC FIPS codes, MCC catchment areas; Manhattan, Queens, Brooklyn
nyc_map <- map_data("county") %>% filter(region == "new york" & subregion %in% c("new york", "kings", "queens"))
tracts <- tigris::tracts(state = "NY", county= c("061", "081", "047"), cb = TRUE, year = 2023)
counties <- tigris::counties(state = "NY", cb = TRUE, year = 2023) %>% filter(COUNTYFP %in% c("061", "081", "047")) # Filter for NYC counties
points1 <- subset(geo2, state == "36" & county %in% nyc_counties & EGFR_FINAL == "Positive") # EGFR positive patient points
points2 <- subset(geo2, state == "36" & county %in% nyc_counties & EGFR_FINAL != "Positive") # EGFR negative patient points
########################################################
##### Total count of our MCC-MELD patients
a <- table(geo2$FIPS_tract)
b <- data.frame(a)
colnames(b) <- c("GEOID_Tract", "Current_Count")
c <- b
c$GEOID_Tract <- as.character(c$GEOID_Tract)
##### Get population count from Census survey
pop_data <- get_decennial(
geography = "tract",
variables = "P1_001N",
state = "NY", # Change to your state
county = c("061", "047", "081"),
year = 2020,
sumfile = "pl"
)
## Getting data from the 2020 decennial Census
## Using the PL 94-171 Redistricting Data Summary File
pop <- data.frame(pop_data)
pop1 <- pop[c(1, 4)]
colnames(pop1) <- c("GEOID_Tract", "Pop")
pop2 <- merge(c, pop1, by = "GEOID_Tract", all.y=T); dim(c); dim(pop1); dim(pop2)
## [1] 2883 2
## [1] 1840 2
## [1] 1840 3
# [1] 2883 2
# [1] 1840 2
# [1] 1490 3
pop2$LCrate <- round(pop2$Current_Count/pop2$Pop * 100000,1) # if pop < 1000, we didn't estimate
pop2$LCrate[pop2$Pop < 1000] <- 0 # I also tried NA, but it looks a bit confusing
pop2$LCrate[is.na(pop2$LCrate)] <- 0
summary(pop2$LCrate) # per 100,000
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 27.10 68.00 93.03 136.05 950.00
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.00 27.10 68.00 93.03 136.05 950.00
geo3 <- pop2
### Shape file#####
tracts <- tigris::tracts(state = "NY", county= c( "061", "081","047"), cb = TRUE, year = 2023)
tracts$ID <- as.character(tracts$GEOID)
geo3$ID <- as.character(geo3$GEOID_Tract)
merged_data <- tracts %>%
left_join(geo3, by = "ID")
ggplot(data = merged_data) +
geom_sf(aes(fill = LCrate), color = NA) + # Use 'income' for the fill gradient
scale_fill_viridis_c(option = "rocket", name = "LC rate/100,000", limits=c(0,1000)) + # Gradient color scale
theme_minimal() +
geom_sf(data = counties, fill = NA, color = "black", size = 10) +
labs(
title = "LC rates (per 100,000) by Census tract",
subtitle = "MCC Catchment Areas: Manhattan, Queens, and Brooklyn",
caption = "Data Source: MCC-MELD and Census Bureau"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)
# ggplot(data = merged_data) +
# geom_sf(aes(fill = LCrate), color = NA) + # Use 'income' for the fill gradient
# scale_fill_viridis_c(option = "rocket", name = "LC rate/100,000", limits=c(0,500)) + # Gradient color scale
# theme_minimal() +
# # geom_point(data = points2, aes(x = long, y = lat), color = "red", size = 0.08, alpha = 0.7) +
# # geom_point(data = points1, aes(x = long, y = lat), color = "green", size = 0.2, alpha = 0.7) +
# geom_sf(data = counties, fill = NA, color = "black", size = 10) +
# labs(
# title = "LC rates (per 100,000) by Census tract",
# subtitle = "MCC Catchment Areas: Manhattan, Queens, and Brooklyn",
# caption = "Data Source: MCC-MELD and Census Bureau"
# ) +
# theme(
# plot.title = element_text(hjust = 0.5),
# plot.subtitle = element_text(hjust = 0.5)
# )
points1 <- subset(geo2, state == "36" & county %in% nyc_counties & EGFR_FINAL == "Positive") # EGFR positive patient points
points2 <- subset(geo2, state == "36" & county %in% nyc_counties & EGFR_FINAL != "Positive") # EGFR negative patient points
########################################################
##### Total count of our MCC-MELD patients
geo2$egfrp <- ifelse(geo2$EGFR_structure == "Positive" , 1, 0)
dat <- subset(geo2, egfrp==1)
##### Total count of our MCC-MELD patients
a <- table(dat$FIPS_tract)
b <- data.frame(a)
colnames(b) <- c("GEOID_Tract", "Current_Count")
c <- b
c$GEOID_Tract <- as.character(c$GEOID_Tract)
##### Get population count from Census survey
pop_data <- get_decennial(
geography = "tract",
variables = "P1_001N",
state = "NY", # Change to your state
county = c("061", "047", "081"),
year = 2020,
sumfile = "pl"
)
## Getting data from the 2020 decennial Census
## Using the PL 94-171 Redistricting Data Summary File
pop <- data.frame(pop_data)
pop1 <- pop[c(1, 4)]
colnames(pop1) <- c("GEOID_Tract", "Pop")
pop2 <- merge(c, pop1, by = "GEOID_Tract", all.y=T); dim(c); dim(pop1); dim(pop2)
## [1] 517 2
## [1] 1840 2
## [1] 1840 3
# [1] 2883 2
# [1] 1840 2
# [1] 1490 3
pop2$LCrate <- round(pop2$Current_Count/pop2$Pop * 100000,1) # if pop < 1000, we didn't estimate
pop2$LCrate[pop2$Pop < 1000] <- 0
pop2$LCrate[is.na(pop2$LCrate)] <- 0
summary(pop2$LCrate) # per 100,000 <--- EGFR LC rate
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 0.00 5.76 0.00 133.20
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.00 0.00 0.00 5.76 0.00 133.20
# summary(pop2$LCrate) # per 100,000 <-- TOtal LC rate
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.00 27.10 68.00 93.03 136.05 950.00
geo3 <- pop2
### Shape file#####
tracts <- tigris::tracts(state = "NY", county= c( "061", "081","047"), cb = TRUE, year = 2023)
tracts$ID <- as.character(tracts$GEOID)
geo3$ID <- as.character(geo3$GEOID_Tract)
merged_data <- tracts %>%
left_join(geo3, by = "ID")
merged_data$LCrate[merged_data$LCrate > 50] <- 50
ggplot(data = merged_data) +
geom_sf(aes(fill = LCrate), color = NA) + # Use 'income' for the fill gradient
scale_fill_viridis_c(option = "plasma", name = "Range", limits=c(0,50)) + # Gradient color scale
theme_minimal() +
geom_sf(data = counties, fill = NA, color = "black", size = 10) +
labs(
title = "EGFRm LC rates (per 100,000) in MCC Catchment Areas",
subtitle = "Data Source: MCC-MELD (numerator) and Census Bureau (denominator)",
#caption = "Data Source: MCC-MELD (numerator) and Census Bureau (denominator)"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)
#
# ggplot(data = merged_data) +
# geom_sf(aes(fill = LCrate), color = NA) + # Use 'income' for the fill gradient
# scale_fill_viridis_c(option = "mako", name = "EGFRm LC rate/100,000", limits=c(0,100)) + # Gradient color scale
# #geom_point(data = points1, aes(x = long, y = lat), color = "green", size = 0.05, alpha = 0.7) +
# theme_minimal() +
# geom_sf(data = counties, fill = NA, color = "black", size = 10) +
# labs(
# title = "EGFRm LC rates (per 100,000) by Census tract",
# subtitle = "MCC Catchment Areas: Manhattan, Queens, and Brooklyn",
# caption = "Data Source: MCC-MELD and Census Bureau"
# ) +
# theme(
# plot.title = element_text(hjust = 0.5),
# plot.subtitle = element_text(hjust = 0.5)
# )
# ggplot(data = merged_data) +
# geom_sf(aes(fill = LCrate), color = NA) + # Use 'income' for the fill gradient
# scale_fill_viridis_c(option = "mako", name = "EGFRm LC rate/100,000", limits=c(0,200)) + # Gradient color scale
# geom_point(data = points1, aes(x = long, y = lat), color = "orange", size = 0.03, alpha = 0.7) +
# theme_minimal() +
# geom_sf(data = counties, fill = NA, color = "black", size = 10) +
# labs(
# title = "EGFRm LC rates (per 100,000) by Census tract",
# subtitle = "MCC Catchment Areas: Manhattan, Queens, and Brooklyn",
# caption = "Data Source: MCC-MELD and Census Bureau"
# ) +
# theme(
# plot.title = element_text(hjust = 0.5),
# plot.subtitle = element_text(hjust = 0.5)
# )
##### Total count of our MCC-MELD patients
geo2$egfrp <- ifelse(geo2$EGFR_structure == "Positive" , 0, 1)
dat <- subset(geo2, egfrp==1)
##### Total count of our MCC-MELD patients
a <- table(dat$FIPS_tract)
b <- data.frame(a)
colnames(b) <- c("GEOID_Tract", "Current_Count")
c <- b
c$GEOID_Tract <- as.character(c$GEOID_Tract)
##### Get population count from Census survey
pop_data <- get_decennial(
geography = "tract",
variables = "P1_001N",
state = "NY", # Change to your state
county = c("061", "047", "081"),
year = 2020,
sumfile = "pl"
)
## Getting data from the 2020 decennial Census
## Using the PL 94-171 Redistricting Data Summary File
pop <- data.frame(pop_data)
pop1 <- pop[c(1, 4)]
colnames(pop1) <- c("GEOID_Tract", "Pop")
pop2 <- merge(c, pop1, by = "GEOID_Tract", all.y=T); dim(c); dim(pop1); dim(pop2)
## [1] 2778 2
## [1] 1840 2
## [1] 1840 3
# [1] 2883 2
# [1] 1840 2
# [1] 1490 3
pop2$LCrate <- round(pop2$Current_Count/pop2$Pop * 100000,1) # if pop < 1000, we didn't estimate
pop2$LCrate[pop2$Pop < 1000] <- 0
pop2$LCrate[is.na(pop2$LCrate)] <- 0
summary(pop2$LCrate) # per 100,000 <--- EGFR LC rate
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 24.65 62.60 87.27 128.82 858.10
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.00 0.00 0.00 5.76 0.00 133.20
# summary(pop2$LCrate) # per 100,000 <-- TOtal LC rate
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.00 27.10 68.00 93.03 136.05 950.00
geo3 <- pop2
### Shape file#####
tracts <- tigris::tracts(state = "NY", county= c( "061", "081","047"), cb = TRUE, year = 2023)
tracts$ID <- as.character(tracts$GEOID)
geo3$ID <- as.character(geo3$GEOID_Tract)
merged_data <- tracts %>%
left_join(geo3, by = "ID")
merged_data$LCrate[merged_data$LCrate > 500] <- 500
ggplot(data = merged_data) +
geom_sf(aes(fill = LCrate), color = NA) + # Use 'income' for the fill gradient
scale_fill_viridis_c(option = "plasma", name = "Range", limits=c(0,500)) + # Gradient color scale
theme_minimal() +
geom_sf(data = counties, fill = NA, color = "black", size = 10) +
labs(
title = "Rates of LC without EGFR mutation (per 100,000) in MCC Catchment Areas",
subtitle = "Data Source: MCC-MELD (numerator) and Census Bureau (denominator)",
#caption = "Data Source: MCC-MELD and Census Bureau"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)
##### Number of ever smokers in MCC-MELD
geo2$ever <- ifelse(geo2$Race == "Asian" , 1, 0)
dat <- subset(geo2, ever==1)
##### Total count of our MCC-MELD patients
a <- table(dat$FIPS_tract)
b <- data.frame(a)
colnames(b) <- c("GEOID_Tract", "Current_Count")
c <- b
c$GEOID_Tract <- as.character(c$GEOID_Tract)
##### Get population count from Census survey
pop_data <- get_decennial(
geography = "tract",
variables = "P1_001N",
state = "NY", # Change to your state
county = c("061", "047", "081"),
year = 2020,
sumfile = "pl"
)
## Getting data from the 2020 decennial Census
## Using the PL 94-171 Redistricting Data Summary File
pop <- data.frame(pop_data)
pop1 <- pop[c(1, 4)]
colnames(pop1) <- c("GEOID_Tract", "Pop")
pop2 <- merge(c, pop1, by = "GEOID_Tract", all.y=T); dim(c); dim(pop1); dim(pop2)
## [1] 756 2
## [1] 1840 2
## [1] 1840 3
# [1] 2883 2
# [1] 1840 2
# [1] 1490 3
pop2$EVERrate <- round(pop2$Current_Count/pop2$Pop * 100000,1) # if pop < 1000, we didn't estimate
pop2$EVERrate[pop2$Pop < 1000] <- 0
pop2$EVERrate[is.na(pop2$EVERrate)] <- 0
summary(pop2$EVERrate) # per 100,000 <--- EGFR LC rate
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 0.00 20.94 24.80 441.50
# Min. 1st Qu. Median Mean 3rd Qu. Max.
# 0.00 27.10 68.00 93.03 136.05 950.00
geo3 <- pop2
### Shape file#####
tracts <- tigris::tracts(state = "NY", county= c( "061", "081","047"), cb = TRUE, year = 2023)
tracts$ID <- as.character(tracts$GEOID)
geo3$ID <- as.character(geo3$GEOID_Tract)
merged_data <- tracts %>%
left_join(geo3, by = "ID")
# ggplot(data = merged_data) +
# geom_sf(aes(fill = EVERrate), color = NA) + # Use 'income' for the fill gradient
# scale_fill_viridis_c(option = "cividis", name = "Ever Smoking/100,000", limits=c(0,1000)) + # Gradient color scale
# theme_minimal() +
# geom_sf(data = counties, fill = NA, color = "black", size = 10) +
# labs(
# title = "Ever smoking rates (per 100,000) by Census tract",
# subtitle = "MCC Catchment Areas: Manhattan, Queens, and Brooklyn",
# caption = "Data Source: MCC-MELD and Census Bureau"
# ) +
# theme(
# plot.title = element_text(hjust = 0.5),
# plot.subtitle = element_text(hjust = 0.5)
# )
ggplot(data = merged_data) +
geom_sf(aes(fill = EVERrate), color = NA) + # Use 'income' for the fill gradient
scale_fill_viridis_c(option = "cividis", name = "Range", limits=c(0,500)) + # Gradient color scale
theme_minimal() +
geom_sf(data = counties, fill = NA, color = "black", size = 10) +
labs(
title = "Prevalence of Asian patients (per 100,000) in MCC Catchment Areas",
subtitle = "Data Source: MCC-MELD (numerator) and Census Bureau (denominator)",
#caption = "Data Source: MCC-MELD and Census Bureau"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)
pm <- read.csv("/Volumes/Shieh-share$/EGFR/cleaned/Risk factors/Archives/Book1.csv")
pm$fips <- as.character(pm$ctfips)
dim(pm) # [1] 70498 10
## [1] 70498 10
length(unique(pm$fips)) # [1] 4901
## [1] 4901
pm1 <- pm[!duplicated(pm[c("fips")]),]
pm2 <- pm1 %>%
group_by(fips) %>%
summarize(pm1 = mean(DS_PM_pred, na.rm = TRUE)) %>%
ungroup()
pm2$FIPS_tract <- pm2$fips
### Plotting #####
tracts <- tigris::tracts(state = "NY", county= c( "061", "081","047"), cb = TRUE, year = 2023)
tracts$ID <- as.character(tracts$GEOID)
pm2$ID <- as.character(pm2$fips)
merged_data <- tracts %>%
left_join(pm2, by = "ID")
summary(merged_data$pm1)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 9.642 10.397 10.572 10.588 10.785 11.228 223
# Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
# 9.642 10.397 10.572 10.588 10.785 11.228 223
ggplot(data = merged_data) +
geom_sf(aes(fill = pm1), color = NA) + # Use 'income' for the fill gradient
scale_fill_viridis_c(option = "inferno", name = "PM2.5 in 2016", limits=c(9.6,11.3)) + # Gradient color scale
theme_minimal() +
geom_sf(data = counties, fill = NA, color = "black", size = 10) +
labs(
title = "Daily Average PM2.5 by Census tract",
subtitle = "MCC Catchment Areas: Manhattan, Queens, and Brooklyn",
caption = "Data Source: CDC and Census Bureau"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)
# https://www.neighborhoodatlas.medicine.wisc.edu/download
adi <- read.csv("/Volumes/Shieh-share$/EGFR/cleaned/Risk factors/Archives/ADI/US_2022_ADI_Census_Block_Group_v4_0_1.csv")
# All 2022 latest data. We can decide what to use later.
adi$FIPS <- as.factor(adi$FIPS)
head(adi)
## GISJOIN FIPS ADI_NATRANK ADI_STATERNK
## 1 G01000100201001 10010201001 76 5
## 2 G01000100201002 10010201002 74 5
## 3 G01000100202001 10010202001 82 6
## 4 G01000100202002 10010202002 82 6
## 5 G01000100203001 10010203001 71 4
## 6 G01000100203002 10010203002 88 7
# GISJOIN FIPS ADI_NATRANK ADI_STATERNK
# 1 G01000100201001 10010201001 76 5
# 2 G01000100201002 10010201002 74 5
# 3 G01000100202001 10010202001 82 6
# 4 G01000100202002 10010202002 82 6
# 5 G01000100203001 10010203001 71 4
# 6 G01000100203002 10010203002 88 7
table(adi$ADI_NATRANK, useNA="always")
##
## 1 10 100 11 12 13 14 15 16 17 18 19 2
## 2360 2360 2359 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360
## 20 21 22 23 24 25 26 27 28 29 3 30 31
## 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360
## 32 33 34 35 36 37 38 39 4 40 41 42 43
## 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360
## 44 45 46 47 48 49 5 50 51 52 53 54 55
## 2360 2360 2360 2360 2360 2360 2360 2360 2360 2360 2359 2359 2359
## 56 57 58 59 6 60 61 62 63 64 65 66 67
## 2359 2359 2359 2359 2360 2359 2359 2359 2359 2359 2359 2359 2359
## 68 69 7 70 71 72 73 74 75 76 77 78 79
## 2359 2359 2360 2359 2359 2359 2359 2359 2359 2359 2359 2359 2359
## 8 80 81 82 83 84 85 86 87 88 89 9 90
## 2360 2359 2359 2359 2359 2359 2359 2359 2359 2359 2359 2360 2359
## 91 92 93 94 95 96 97 98 99 GQ GQ-PH PH QDI
## 2359 2359 2359 2359 2359 2359 2359 2359 2359 2795 804 2561 224
## <NA>
## 0
# Suppression Codes:
# GQ (Group Quarters) - Greater than 33.3% of Housing Units are Group Quarters
# PH (Population/Housing) - Population less than 100 and/or fewer than 30 housing units
# GQ-PH (Group Quarters and Population Housing) - Both GQ and PH conditions are met
# QDI (Questionable Data Integrity) - Block Groups missing a key demographic factor for ADI construction (supressed or missing in the ACS data)
adi$ADI_NATRANK[ adi$ADI_NATRANK == "GQ"] <- NA
adi$ADI_NATRANK[ adi$ADI_NATRANK == "GQ-PH"] <- NA
adi$ADI_NATRANK[ adi$ADI_NATRANK == "PH"] <- NA
adi$ADI_NATRANK[ adi$ADI_NATRANK == "QDI"] <- NA
adi$ADI_STATERNK[ adi$ADI_STATERNK == "GQ"] <- NA
adi$ADI_STATERNK[ adi$ADI_STATERNK == "GQ-PH"] <- NA
adi$ADI_STATERNK[ adi$ADI_STATERNK == "PH"] <- NA
adi$ADI_STATERNK[ adi$ADI_STATERNK == "QDI"] <- NA
geo <- read.csv("/Volumes/Shieh-share$/EGFR/cleaned/Risk factors/1_AreaData_20250212.csv")
geo2 <- merge(small, geo, by = "pat_id");
#head(geo2)
geo2$ADI_NATRANK <- as.numeric(geo2$ADI_NATRANK)
geo2$ADI_STATERNK <- as.numeric(geo2$ADI_STATERNK)
# Convert GEOID to character type for consistency
tracts$GEOID <- as.character(tracts$GEOID)
geo2$GEOID <- as.character(geo2$GEOID)
merged_data <- tracts %>%
left_join(geo2, by = "GEOID")
ggplot(data = merged_data) +
geom_sf(aes(fill = ADI_STATERNK), color = NA) + # Use 'income' for the fill gradient
geom_sf(data = counties, fill = NA, color = "black", size = 10) +
scale_fill_viridis_c(option = "plasma", name = "State ADI") + # Gradient color scale
theme_minimal() +
labs(
title = "ADI Gradient by Census Tract (10: Most Deprived)",
subtitle = "Data Source: Neighborhood Atlas, and Census Bureau"
#caption = "Data Source: MCC-MELD, Neighborhood Atlas, and Census Bureau"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)
ggplot(data = merged_data) +
geom_sf(aes(fill = Prop.ForeignBorn), color = NA) + # Use 'income' for the fill gradient
geom_sf(data = counties, fill = NA, color = "black", size = 10) +
scale_fill_viridis_c(option = "plasma", name = "Foreign Born (%)", limits=c(0,1)) + # Gradient color scale
theme_minimal() +
labs(
title = "Prop (%) Foreign Born",
subtitle = "Data Source: Census Bureau"
#caption = "Data Source: MCC-MELD and Census Bureau"
) +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)